コード例 #1
0
ファイル: wzoptim.py プロジェクト: xinmei9322/anglepy
def optim_vae_sfo(model,
                  x,
                  v_init,
                  w_init,
                  n_batch,
                  n_passes,
                  hook,
                  n_resample=20,
                  resample_keepmem=False,
                  bernoulli_x=False,
                  display=0):

    # Shuffle columns of dataset x
    ndict.shuffleCols(x)

    # create minibatches
    n_tot = x.itervalues().next().shape[1]
    minibatches = []
    n_minibatches = n_tot / n_batch
    if (n_tot % n_batch) != 0: raise Exception()

    # Divide into minibatches
    def make_minibatch(i):
        _x = ndict.getCols(x, i * n_batch, (i + 1) * n_batch)
        _eps = model.gen_eps(n_batch)
        if bernoulli_x: _x['x'] = np.random.binomial(n=1, p=_x['x'])
        return [i, _x, _eps]

    for i in range(n_minibatches):
        minibatches.append(make_minibatch(i))

    L = [0.]
    n_L = [0]

    def f_df(w, minibatch):

        i_minibatch = minibatch[0]
        x_minibatch = minibatch[1]
        eps_minibatch = minibatch[2]

        # Get gradient
        logpx, logpz, logqz, gv, gw = model.dL_dw(w['v'], w['w'], x_minibatch,
                                                  eps_minibatch)

        # Get gradient w.r.t. priors
        logpv, logpw, gv_prior, gw_prior = model.dlogpw_dw(w['v'], w['w'])
        gv = {i: gv[i] + float(n_batch) / n_tot * gv_prior[i] for i in gv}
        gw = {i: gw[i] + float(n_batch) / n_tot * gw_prior[i] for i in gw}

        f = (logpx.sum() + logpz.sum() - logqz.sum())
        L[0] += -f / (1. * n_batch)
        n_L[0] += 1
        f += float(n_batch) / n_tot * logpv
        f += float(n_batch) / n_tot * logpw

        for i in gv:
            gv[i] *= -1. / n_batch
        for i in gw:
            gw[i] *= -1. / n_batch
        f *= -1. / n_batch

        #print 'norms gv:'
        #ndict.pNorm(gv)
        #print 'norms gw'
        #ndict.pNorm(gw)

        return f, {'v': gv, 'w': gw}

    w_init = {'v': v_init, 'w': w_init}

    from sfo import SFO
    optimizer = SFO(f_df, w_init, minibatches, display=display)

    #optimizer.check_grad()

    # loop
    for i in range(n_passes):
        w = optimizer.optimize(num_passes=1)
        LB = L[0] / (1. * n_L[0])
        hook(i, w['v'], w['w'], LB)
        L[0] = 0
        n_L[0] = 0
        # Reset noise epsilon of some minibatches
        for j in range(n_minibatches):
            if n_resample > 0 and i % n_resample == j % n_resample:
                minibatches[j] = make_minibatch(j)
                optimizer.replace_subfunction(j, resample_keepmem,
                                              minibatches[j])

    print "Finished!"
コード例 #2
0
ファイル: gpulearn_yz_x.py プロジェクト: 2020zyc/nips14-ssl
def main(n_z, n_hidden, dataset, seed, gfx=True, _size=None):
    '''Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z).
    x and y are (always) observed.
    I.e. this cannot be used for semi-supervised learning
    '''
    assert (type(n_hidden) == tuple or type(n_hidden) == list)
    assert type(n_z) == int
    assert isinstance(dataset, basestring)
    
    print 'gpulearn_yz_x', n_z, n_hidden, dataset, seed
    
    import time
    logdir = 'results/gpulearn_yz_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'-'+str(int(time.time()))+'/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print 'logdir:', logdir
    
    np.random.seed(seed)
    
    # Init data
    if dataset == 'mnist':
        '''
        What works well:
        100-2-100 (Generated digits stay bit shady)
        1000-2-1000 (Needs pretty long training)
        '''
        import anglepy.data.mnist as mnist
        
        # MNIST
        size = 28
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size, binarize_y=True)
        f_enc, f_dec = lambda x:x, lambda x:x
        x = {'x': train_x[:,:].astype(np.float32), 'y': train_y[:,:].astype(np.float32)}
        x_valid = {'x': valid_x.astype(np.float32), 'y': valid_y.astype(np.float32)}
        L_valid = 1
        dim_input = (size,size)
        n_x = size*size
        n_y = 10
        n_batch = 1000
        colorImg = False
        bernoulli_x = True
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 2
        type_px = 'bernoulli'

    elif dataset == 'norb':
        # resized NORB dataset, reshuffled
        import anglepy.data.norb as norb
        size = _size #48
        train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)
        _x = {'x': train_x, 'y': train_y}
        ndict.shuffleCols(_x)
        train_x = _x['x']
        train_y = _x['y']
        
        # Do PCA
        f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=2000, toFloat=False)
        ndict.savez(pca_params, logdir+'pca_params')
        
        x = {'x': f_enc(train_x).astype(np.float32), 'y':train_y.astype(np.float32)}
        x_valid = {'x': f_enc(test_x).astype(np.float32), 'y':test_y.astype(np.float32)}
        
        L_valid = 1
        n_x = x['x'].shape[0]
        n_y = 5
        dim_input = (size,size)
        n_batch = 1000 #23400/900 = 27
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 1
        type_px = 'gaussian'

    elif dataset == 'norb_instances': 
        # resized NORB dataset with the instances as classes
        import anglepy.data.norb2 as norb2
        size = _size #48
        x, y = norb2.load_numpy_subclasses(size, binarize_y=True)
        _x = {'x': x, 'y': y}
        ndict.shuffleCols(_x)
        
        # Do pre=processing
        if True:
            # Works
            f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=600, global_sd=True, toFloat=True)
            ndict.savez(pca_params, logdir+'pca_params')
        elif False:
            # Doesn't work
            f_enc, f_dec, pp_params = pp.normalize_noise(_x['x'][:,:50000], noise_sd=0.01, global_sd=True, toFloat=True)
        else:
            # Doesn't work
            f_enc, f_dec, params = pp.normalize_random(x=x[:,:10000], global_sd=True, toFloat=True)
            ndict.savez(params, logdir+'normalize_random_params')
        
        n_valid = 5000
        x = {'x': f_enc(_x['x'][:,:-n_valid]).astype(np.float32), 'y':_x['y'][:,:-n_valid].astype(np.float32)}
        x_valid = {'x': f_enc(_x['x'][:,:n_valid]).astype(np.float32), 'y':_x['y'][:,:n_valid].astype(np.float32)}
        
        L_valid = 1
        n_x = x['x'].shape[0]
        n_y = 50
        dim_input = (size,size)
        n_batch = 5000 #23400/900 = 27
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 1
        type_px = 'gaussian'

    elif dataset == 'svhn':    
        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, test_x, test_y = svhn.load_numpy(False, binarize_y=True) #norb.load_resized(size, binarize_y=True)
        extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
        x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))}
        ndict.shuffleCols(x)
        
        #f_enc, f_dec, (x_sd, x_mean) = pp.preprocess_normalize01(train_x, True)
        f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=1000, toFloat=True)
        ndict.savez(pca_params, logdir+'pca_params')
        
        n_y = 10
        x = {'x': f_enc(x['x']).astype(np.float32), 'y': x['y'].astype(np.float32)}
        x_valid = {'x': f_enc(test_x).astype(np.float32), 'y': test_y.astype(np.float32)}
        L_valid = 1
        n_x = x['x'].shape[0]
        dim_input = (size,size)
        n_batch = 5000
        colorImg = True
        bernoulli_x = False
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 2
        type_px = 'gaussian'
        
    # Init model
    n_hidden_q = n_hidden
    n_hidden_p = n_hidden
    from anglepy.models import GPUVAE_YZ_X
    updates = get_adam_optimizer(alpha=3e-4, beta1=0.9, beta2=0.999, weight_decay=0)
    model = GPUVAE_YZ_X(updates, n_x, n_y, n_hidden_q, n_z, n_hidden_p[::-1], 'softplus', 'softplus', type_px=type_px, type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1, uniform_y=True)
    
    if False:
        dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414094291/'
        dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414163488/'
        w = ndict.loadz(dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(dir+'v_best.ndict.tar.gz')
        ndict.set_value(model.w, w)
        ndict.set_value(model.v, v)
    
    # Some statistics for optimization
    ll_valid_stats = [-1e99, 0]

    # Fixed sample for visualisation
    z_sample = {'z': np.repeat(np.random.standard_normal(size=(n_z, 12)), 12, axis=1).astype(np.float32)}
    y_sample = {'y': np.tile(np.random.multinomial(1, [1./n_y]*n_y, size=12).T, (1, 12))}
    
    # Progress hook
    def hook(epoch, t, ll):
        
        if epoch%10 != 0:
            return
        
        ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)
            
        if math.isnan(ll_valid):
            print "NaN detected. Reverting to saved best parameters"
            ndict.set_value(model.v, ndict.loadz(logdir+'v.ndict.tar.gz'))
            ndict.set_value(model.w, ndict.loadz(logdir+'w.ndict.tar.gz'))
            return
            
        if ll_valid > ll_valid_stats[0]:
            ll_valid_stats[0] = ll_valid
            ll_valid_stats[1] = 0
            ndict.savez(ndict.get_value(model.v), logdir+'v_best')
            ndict.savez(ndict.get_value(model.w), logdir+'w_best')
        else:
            ll_valid_stats[1] += 1
            # Stop when not improving validation set performance in 100 iterations
            if False and ll_valid_stats[1] > 1000:
                print "Finished"
                with open(logdir+'hook.txt', 'a') as f:
                    print >>f, "Finished"
                exit()

        # Log
        ndict.savez(ndict.get_value(model.v), logdir+'v')
        ndict.savez(ndict.get_value(model.w), logdir+'w')
        print epoch, t, ll, ll_valid
        with open(logdir+'hook.txt', 'a') as f:
            print >>f, t, ll, ll_valid
        
        if gfx:   
            # Graphics
            
            v = {i: model.v[i].get_value() for i in model.v}
            w = {i: model.w[i].get_value() for i in model.w}
                
            tail = '-'+str(epoch)+'.png'
            
            image = paramgraphics.mat_to_img(f_dec(v['w0x'][:].T), dim_input, True, colorImg=colorImg)
            image.save(logdir+'q_w0x'+tail, 'PNG')
            
            image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg)
            image.save(logdir+'out_w'+tail, 'PNG')
            
            _x = {'y': np.random.multinomial(1, [1./n_y]*n_y, size=144).T}
            _, _, _z_confab = model.gen_xz(_x, {}, n_batch=144)
            image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg)
            image.save(logdir+'samples'+tail, 'PNG')
            
            _, _, _z_confab = model.gen_xz(y_sample, z_sample, n_batch=144)
            image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg)
            image.save(logdir+'samples_fixed'+tail, 'PNG')
            
            if n_z == 2:
                
                import ImageFont
                import ImageDraw
                
                n_width = 10
                submosaic_offset = 15
                submosaic_width = (dim_input[1]*n_width)
                submosaic_height = (dim_input[0]*n_width)
                mosaic = Image.new("RGB", (submosaic_width*mosaic_w, submosaic_offset+submosaic_height*mosaic_h))
                
                for digit in range(0,n_y):
                    if digit >= mosaic_h*mosaic_w: continue
                    
                    _x = {}
                    n_batch_plot = n_width*n_width
                    _x['y'] = np.zeros((n_y,n_batch_plot))
                    _x['y'][digit,:] = 1
                    _z = {'z':np.zeros((2,n_width**2))}
                    for i in range(0,n_width):
                        for j in range(0,n_width):
                            _z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width)
                            _z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width)
                    
                    _x, _, _z_confab = model.gen_xz(_x, _z, n_batch=n_batch_plot)
                    x_samples = _z_confab['x']
                    image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg, tile_spacing=(0,0))
                    
                    #image.save(logdir+'samples_digit_'+str(digit)+'_'+tail, 'PNG')
                    mosaic_x = (digit%mosaic_w)*submosaic_width
                    mosaic_y = submosaic_offset+int(digit/mosaic_w)*submosaic_height
                    mosaic.paste(image, (mosaic_x, mosaic_y))
                
                draw = ImageDraw.Draw(mosaic)
                draw.text((1,1),"Epoch #"+str(epoch)+" Loss="+str(int(ll)))
                    
                #plt.savefig(logdir+'mosaic'+tail, format='PNG')
                mosaic.save(logdir+'mosaic'+tail, 'PNG')
                
                #x_samples = _x['x']
                #image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg)
                #image.save(logdir+'samples2'+tail, 'PNG')
        
    # Optimize
    dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat)
    loop_va(dostep, hook)
    
    pass
コード例 #3
0
ファイル: learn_yz_x_ss.py プロジェクト: 2020zyc/nips14-ssl
def optim_vae_ss_adam(alpha, model_qy, model, x_labeled, x_unlabeled, n_y, u_init, v_init, w_init, n_minibatches, n_passes, hook, n_reset=20, resample_keepmem=False, display=0):
    
    # Shuffle datasets
    ndict.shuffleCols(x_labeled)
    ndict.shuffleCols(x_unlabeled)
    
    # create minibatches
    minibatches = []

    n_labeled = x_labeled.itervalues().next().shape[1]
    n_batch_l = n_labeled / n_minibatches
    if (n_labeled%n_batch_l) != 0: raise Exception()
    
    n_unlabeled = x_unlabeled.itervalues().next().shape[1]
    n_batch_u = n_unlabeled / n_minibatches
    if (n_unlabeled%n_batch_u) != 0: raise Exception()
    
    n_tot = n_labeled + n_unlabeled

    # Divide into minibatches
    def make_minibatch(i):
        _x_labeled = ndict.getCols(x_labeled, i * n_batch_l, (i+1) * n_batch_l)
        _x_unlabeled = ndict.getCols(x_unlabeled, i * n_batch_u, (i+1) * n_batch_u)
        return [i, _x_labeled, _x_unlabeled]

    for i in range(n_minibatches):
        minibatches.append(make_minibatch(i))
    
    # For integrating-out approach
    L_inner = T.dmatrix()
    L_unlabeled = T.dot(np.ones((1, n_y)), model_qy.p * (L_inner - T.log(model_qy.p)))
    grad_L_unlabeled = T.grad(L_unlabeled.sum(), model_qy.var_w.values())
    f_du =  theano.function([model_qy.var_x['x']] + model_qy.var_w.values() + [model_qy.var_A, L_inner], [L_unlabeled] + grad_L_unlabeled)
    
    # Some statistics
    L = [0.]
    n_L = [0]
    
    def f_df(w, minibatch):
        
        u = w['u']
        v = w['v']
        w = w['w']
        
        i_minibatch = minibatch[0]
        _x_l = minibatch[1] #labeled
        x_minibatch_l = {'x': np.random.normal(_x_l['mean'], np.exp(0.5*_x_l['logvar'])), 'y': _x_l['y']}
        eps_minibatch_l = model.gen_eps(n_batch_l)
        
        _x_u = minibatch[2] #unlabeled
        x_minibatch_u = {'x': np.random.normal(_x_u['mean'], np.exp(0.5*_x_u['logvar'])), 'y': _x_u['y']}
        eps_minibatch_u = [model.gen_eps(n_batch_u) for i in range(n_y)]
        
        # === Get gradient for labeled data
        # gradient of -KL(q(z|y,x) ~p(x,y) || p(x,y,z))
        logpx, logpz, logqz, gv_labeled, gw_labeled = model.dL_dw(v, w, x_minibatch_l, eps_minibatch_l)        
        # gradient of classification error E_{~p(x,y)}[q(y|x)]
        logqy, _, gu_labeled, _ = model_qy.dlogpxz_dwz(u, x_minibatch_l, {})
        
        # Reweight gu_labeled and logqy
        #beta = alpha / (1.-alpha) * (1. * n_unlabeled / n_labeled) #old
        beta = alpha * (1. * n_tot / n_labeled)
        for i in u: gu_labeled[i] *= beta
        logqy *= beta
        
        L_labeled = logpx + logpz - logqz + logqy
        
        # === Get gradient for unlabeled data
        # -KL(q(z|x,y)q(y|x) ~p(x) || p(x,y,z))
        # Approach where outer expectation (over q(z|x,y)) is taken as explicit sum (instead of sampling)
        u = ndict.ordered(u)
        py = model_qy.dist_px['y'](*([x_minibatch_u['x']] + u.values() + [np.ones((1, n_batch_u))]))
        
        if True:
            # Original
            _L = np.zeros((n_y, n_batch_u))
            gv_unlabeled = {i: 0 for i in v}
            gw_unlabeled = {i: 0 for i in w}
            for label in range(n_y):
                new_y = np.zeros((n_y, n_batch_u))
                new_y[label,:] = 1
                eps = eps_minibatch_u[label]
                #logpx, logpz, logqz, _gv, _gw = model.dL_dw(v, w, {'x':x_minibatch['x'],'y':new_y}, eps)
                L_unweighted, L_weighted, _gv, _gw = model.dL_weighted_dw(v, w, {'x':x_minibatch_u['x'],'y':new_y}, eps, py[label:label+1,:])
                _L[label:label+1,:] = L_unweighted
                for i in v: gv_unlabeled[i] += _gv[i]
                for i in w: gw_unlabeled[i] += _gw[i]
        else:
            # New, should be more efficient. (But is not in practice)
            _y = np.zeros((n_y, n_batch_u*n_y))
            for label in range(n_y):
                _y[label,label*n_batch_u:(label+1)*n_batch_u] = 1
            _x = np.tile(x_minibatch_u['x'].astype(np.float32), (1, n_y))
            eps = model.gen_eps(n_batch_u*n_y)
            L_unweighted, L_weighted, gv_unlabeled, gw_unlabeled = model.dL_weighted_dw(v, w, {'x':_x,'y':_y}, eps, py.reshape((1, -1)))
            _L = L_unweighted.reshape((n_y, n_batch_u))
        
        r = f_du(*([x_minibatch_u['x']] + u.values() + [np.zeros((1, n_batch_u)), _L]))
        L_unlabeled = r[0]
        gu_unlabeled = dict(zip(u.keys(), r[1:]))
        
        # Get gradient of prior
        logpu, gu_prior = model_qy.dlogpw_dw(u)
        logpv, logpw, gv_prior, gw_prior = model.dlogpw_dw(v, w)
        
        # Combine gradients and objective
        gu = {i: ((gu_labeled[i] + gu_unlabeled[i]) * n_minibatches + gu_prior[i])/(-n_tot) for i in u}
        gv = {i: ((gv_labeled[i] + gv_unlabeled[i]) * n_minibatches + gv_prior[i])/(-n_tot) for i in v}
        gw = {i: ((gw_labeled[i] + gw_unlabeled[i]) * n_minibatches + gw_prior[i])/(-n_tot) for i in w}
        f = ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches + logpu + logpv + logpw)/(-n_tot)
        
        L[0] += ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches + logpu + logpv + logpw)/(-n_tot)
        n_L[0] += 1
        
        #ndict.pNorm(gu_unlabeled)
        
        return f, {'u': gu, 'v':gv, 'w':gw}
    
    w_init = {'u': u_init, 'v':v_init, 'w':w_init}
    
    optimizer = AdaM(f_df, w_init, minibatches, alpha=3e-4, beta1=0.9, beta2=0.999)
    
    for i in range(n_passes):
        w = optimizer.optimize(num_passes=1)
        LB = L[0]/(1.*n_L[0])
        testset_error = hook(i, w['u'], w['v'], w['w'], LB)
        L[0] = 0
        n_L[0] = 0
    
    return testset_error
コード例 #4
0
def optim_vae_ss_adam(alpha,
                      model_qy,
                      model,
                      x_labeled,
                      x_unlabeled,
                      n_y,
                      u_init,
                      v_init,
                      w_init,
                      n_minibatches,
                      n_passes,
                      hook,
                      n_reset=20,
                      resample_keepmem=False,
                      display=0):

    # Shuffle datasets
    ndict.shuffleCols(x_labeled)
    ndict.shuffleCols(x_unlabeled)

    # create minibatches
    minibatches = []

    n_labeled = iter(x_labeled.values()).next().shape[1]
    n_batch_l = n_labeled / n_minibatches
    if (n_labeled % n_batch_l) != 0: raise Exception()

    n_unlabeled = iter(x_unlabeled.values()).next().shape[1]
    n_batch_u = n_unlabeled / n_minibatches
    if (n_unlabeled % n_batch_u) != 0: raise Exception()

    n_tot = n_labeled + n_unlabeled

    # Divide into minibatches
    def make_minibatch(i):
        _x_labeled = ndict.getCols(x_labeled, i * n_batch_l,
                                   (i + 1) * n_batch_l)
        _x_unlabeled = ndict.getCols(x_unlabeled, i * n_batch_u,
                                     (i + 1) * n_batch_u)
        return [i, _x_labeled, _x_unlabeled]

    for i in range(n_minibatches):
        minibatches.append(make_minibatch(i))

    # For integrating-out approach
    L_inner = T.dmatrix()
    L_unlabeled = T.dot(np.ones((1, n_y)),
                        model_qy.p * (L_inner - T.log(model_qy.p)))
    grad_L_unlabeled = T.grad(L_unlabeled.sum(), list(model_qy.var_w.values()))
    f_du = theano.function(
        [model_qy.var_x['x']] + list(model_qy.var_w.values()) +
        [model_qy.var_A, L_inner], [L_unlabeled] + grad_L_unlabeled)

    # Some statistics
    L = [0.]
    n_L = [0]

    def f_df(w, minibatch):

        u = w['u']
        v = w['v']
        w = w['w']

        i_minibatch = minibatch[0]
        _x_l = minibatch[1]  #labeled
        x_minibatch_l = {
            'x': np.random.normal(_x_l['mean'], np.exp(0.5 * _x_l['logvar'])),
            'y': _x_l['y']
        }
        eps_minibatch_l = model.gen_eps(n_batch_l)

        _x_u = minibatch[2]  #unlabeled
        x_minibatch_u = {
            'x': np.random.normal(_x_u['mean'], np.exp(0.5 * _x_u['logvar'])),
            'y': _x_u['y']
        }
        eps_minibatch_u = [model.gen_eps(n_batch_u) for i in range(n_y)]

        # === Get gradient for labeled data
        # gradient of -KL(q(z|y,x) ~p(x,y) || p(x,y,z))
        logpx, logpz, logqz, gv_labeled, gw_labeled = model.dL_dw(
            v, w, x_minibatch_l, eps_minibatch_l)
        # gradient of classification error E_{~p(x,y)}[q(y|x)]
        logqy, _, gu_labeled, _ = model_qy.dlogpxz_dwz(u, x_minibatch_l, {})

        # Reweight gu_labeled and logqy
        #beta = alpha / (1.-alpha) * (1. * n_unlabeled / n_labeled) #old
        beta = alpha * (1. * n_tot / n_labeled)
        for i in u:
            gu_labeled[i] *= beta
        logqy *= beta

        L_labeled = logpx + logpz - logqz + logqy

        # === Get gradient for unlabeled data
        # -KL(q(z|x,y)q(y|x) ~p(x) || p(x,y,z))
        # Approach where outer expectation (over q(z|x,y)) is taken as explicit sum (instead of sampling)
        u = ndict.ordered(u)
        py = model_qy.dist_px['y'](*([x_minibatch_u['x']] + list(u.values()) +
                                     [np.ones((1, n_batch_u))]))

        if True:
            # Original
            _L = np.zeros((n_y, n_batch_u))
            gv_unlabeled = {i: 0 for i in v}
            gw_unlabeled = {i: 0 for i in w}
            for label in range(n_y):
                new_y = np.zeros((n_y, n_batch_u))
                new_y[label, :] = 1
                eps = eps_minibatch_u[label]
                #logpx, logpz, logqz, _gv, _gw = model.dL_dw(v, w, {'x':x_minibatch['x'],'y':new_y}, eps)
                L_unweighted, L_weighted, _gv, _gw = model.dL_weighted_dw(
                    v, w, {
                        'x': x_minibatch_u['x'],
                        'y': new_y
                    }, eps, py[label:label + 1, :])
                _L[label:label + 1, :] = L_unweighted
                for i in v:
                    gv_unlabeled[i] += _gv[i]
                for i in w:
                    gw_unlabeled[i] += _gw[i]
        else:
            # New, should be more efficient. (But is not in practice)
            _y = np.zeros((n_y, n_batch_u * n_y))
            for label in range(n_y):
                _y[label, label * n_batch_u:(label + 1) * n_batch_u] = 1
            _x = np.tile(x_minibatch_u['x'].astype(np.float32), (1, n_y))
            eps = model.gen_eps(n_batch_u * n_y)
            L_unweighted, L_weighted, gv_unlabeled, gw_unlabeled = model.dL_weighted_dw(
                v, w, {
                    'x': _x,
                    'y': _y
                }, eps, py.reshape((1, -1)))
            _L = L_unweighted.reshape((n_y, n_batch_u))

        r = f_du(*([x_minibatch_u['x']] + list(u.values()) +
                   [np.zeros((1, n_batch_u)), _L]))
        L_unlabeled = r[0]
        gu_unlabeled = dict(list(zip(list(u.keys()), r[1:])))

        # Get gradient of prior
        logpu, gu_prior = model_qy.dlogpw_dw(u)
        logpv, logpw, gv_prior, gw_prior = model.dlogpw_dw(v, w)

        # Combine gradients and objective
        gu = {
            i:
            ((gu_labeled[i] + gu_unlabeled[i]) * n_minibatches + gu_prior[i]) /
            (-n_tot)
            for i in u
        }
        gv = {
            i:
            ((gv_labeled[i] + gv_unlabeled[i]) * n_minibatches + gv_prior[i]) /
            (-n_tot)
            for i in v
        }
        gw = {
            i:
            ((gw_labeled[i] + gw_unlabeled[i]) * n_minibatches + gw_prior[i]) /
            (-n_tot)
            for i in w
        }
        f = ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches + logpu +
             logpv + logpw) / (-n_tot)

        L[0] += ((L_labeled.sum() + L_unlabeled.sum()) * n_minibatches +
                 logpu + logpv + logpw) / (-n_tot)
        n_L[0] += 1

        #ndict.pNorm(gu_unlabeled)

        return f, {'u': gu, 'v': gv, 'w': gw}

    w_init = {'u': u_init, 'v': v_init, 'w': w_init}

    optimizer = AdaM(f_df,
                     w_init,
                     minibatches,
                     alpha=3e-4,
                     beta1=0.9,
                     beta2=0.999)

    for i in range(n_passes):
        w = optimizer.optimize(num_passes=1)
        LB = L[0] / (1. * n_L[0])
        testset_error = hook(i, w['u'], w['v'], w['w'], LB)
        L[0] = 0
        n_L[0] = 0

    return testset_error
コード例 #5
0
ファイル: gpulearn_z_x.py プロジェクト: candy4869/2014
def main(n_z, n_hidden, dataset, seed, comment, gfx=True):

    # Initialize logdir
    import time
    logdir = 'results/gpulearn_z_x_' + dataset + '_' + str(n_z) + '-' + str(
        n_hidden) + '_' + comment + '_' + str(int(time.time())) + '/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print 'logdir:', logdir
    print 'gpulearn_z_x', n_z, n_hidden, dataset, seed
    with open(logdir + 'hook.txt', 'a') as f:
        print >> f, 'learn_z_x', n_z, n_hidden, dataset, seed

    np.random.seed(seed)

    gfx_freq = 1

    weight_decay = 0
    f_enc, f_dec = lambda x: x, lambda x: x

    # Init data
    if dataset == 'mnist':
        import anglepy.data.mnist as mnist

        # MNIST
        size = 28
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(
            size)
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': valid_x.astype(np.float32)}
        x_test = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (size, size)
        n_x = size * size
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        nonlinear = 'softplus'
        type_px = 'bernoulli'
        n_train = 50000
        n_batch = 1000
        colorImg = False
        bernoulli_x = True
        byteToFloat = False
        weight_decay = float(n_batch) / n_train

    if dataset == 'mnist_binarized':
        import anglepy.data.mnist_binarized as mnist_binarized
        # MNIST
        train_x, valid_x, test_x = mnist_binarized.load_numpy(28)
        x = {'x': np.hstack((train_x, valid_x)).astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (28, 28)
        n_x = 28 * 28
        n_y = 10
        type_qz = 'gaussianmarg'
        type_pz = 'mog'
        nonlinear = 'rectlin'
        type_px = 'bernoulli'
        n_train = 60000
        n_batch = 1000
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / n_train

    elif dataset == 'freyface':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy()
        np.random.shuffle(train_x)
        x = {'x': train_x.T[:, 0:n_train]}
        x_valid = {'x': train_x.T[:, n_train:]}
        L_valid = 1
        dim_input = (28, 20)
        n_x = 20 * 28
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'bounded01'
        nonlinear = 'tanh'  #tanh works better with freyface #'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / n_train

    elif dataset == 'freyface_pca':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)

        f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        train_x = f_enc(train_x)

        x = {'x': train_x[:, 0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:, n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28, 20)
        n_x = train_x.shape[0]
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'freyface_bernoulli':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)

        x = {'x': train_x[:, 0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:, n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28, 20)
        n_x = train_x.shape[0]
        type_pz = 'gaussianmarg'
        type_px = 'bernoulli'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'norb':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size,
                                                             binarize_y=True)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size, size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900  #23400/900 = 27
        colorImg = False
        #binarize = False
        byteToFloat = False
        bernoulli_x = False
        weight_decay = float(n_batch) / train_x.shape[1]

    elif dataset == 'norb_pca':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size,
                                                             binarize_y=True)

        f_enc, f_dec, _ = pp.PCA(train_x, 0.999)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size, size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900  #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / train_x.shape[1]

    elif dataset == 'norb_normalized':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size,
                                                             binarize_y=True)

        #f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        f_enc, f_dec, _ = pp.normalize(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size, size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900  #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / train_x.shape[1]

    elif dataset == 'svhn':
        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, test_x, test_y = svhn.load_numpy(
            False, binarize_y=True)  #norb.load_resized(size, binarize_y=True)
        extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
        x = {
            'x': np.hstack((train_x, extra_x)),
            'y': np.hstack((train_y, extra_y))
        }
        ndict.shuffleCols(x)

        print 'Performing PCA, can take a few minutes... ',
        f_enc, f_dec, pca_params = pp.PCA(x['x'][:, :10000],
                                          cutoff=600,
                                          toFloat=True)
        ndict.savez(pca_params, logdir + 'pca_params')
        print 'Done.'

        n_y = 10
        x = {'x': f_enc(x['x']).astype(np.float32)}
        x_valid = {'x': f_enc(test_x).astype(np.float32)}
        L_valid = 1
        n_x = x['x'].shape[0]
        dim_input = (size, size)
        n_batch = 5000
        colorImg = True
        bernoulli_x = False
        byteToFloat = False
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'

    # Construct model
    from anglepy.models import GPUVAE_Z_X
    updates = get_adam_optimizer(learning_rate=3e-4, weight_decay=weight_decay)
    model = GPUVAE_Z_X(updates,
                       n_x,
                       n_hidden,
                       n_z,
                       n_hidden[::-1],
                       nonlinear,
                       nonlinear,
                       type_px,
                       type_qz=type_qz,
                       type_pz=type_pz,
                       prior_sd=100,
                       init_sd=1e-3)

    if False:
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/'
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/'
        #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/'
        dir = '/home/ubuntu/results/gpulearn_z_x_mnist_50-[500, 500]__1414259423/'
        w = ndict.loadz(dir + 'w_best.ndict.tar.gz')
        v = ndict.loadz(dir + 'v_best.ndict.tar.gz')
        ndict.set_value(model.w, w)
        ndict.set_value(model.v, v)

    # Some statistics for optimization
    ll_valid_stats = [-1e99, 0]

    # Progress hook
    def hook(epoch, t, ll):

        if epoch % 10 != 0: return

        ll_valid, _ = model.est_loglik(x_valid,
                                       n_samples=L_valid,
                                       n_batch=n_batch,
                                       byteToFloat=byteToFloat)

        # Log
        ndict.savez(ndict.get_value(model.v), logdir + 'v')
        ndict.savez(ndict.get_value(model.w), logdir + 'w')

        if ll_valid > ll_valid_stats[0]:
            ll_valid_stats[0] = ll_valid
            ll_valid_stats[1] = 0
            ndict.savez(ndict.get_value(model.v), logdir + 'v_best')
            ndict.savez(ndict.get_value(model.w), logdir + 'w_best')
        else:
            ll_valid_stats[1] += 1
            # Stop when not improving validation set performance in 100 iterations
            if ll_valid_stats[1] > 1000:
                print "Finished"
                with open(logdir + 'hook.txt', 'a') as f:
                    print >> f, "Finished"
                exit()

        print epoch, t, ll, ll_valid, ll_valid_stats
        with open(logdir + 'hook.txt', 'a') as f:
            print >> f, epoch, t, ll, ll_valid, ll_valid_stats

        # Graphics
        if gfx and epoch % gfx_freq == 0:

            #tail = '.png'
            tail = '-' + str(epoch) + '.png'

            v = {i: model.v[i].get_value() for i in model.v}
            w = {i: model.w[i].get_value() for i in model.w}

            if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset:

                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T),
                                                     dim_input,
                                                     True,
                                                     colorImg=colorImg)
                    image.save(logdir + 'q_w0' + tail, 'PNG')

                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]),
                                                 dim_input,
                                                 True,
                                                 colorImg=colorImg)
                image.save(logdir + 'out_w' + tail, 'PNG')

                if 'out_unif' in w:
                    image = paramgraphics.mat_to_img(f_dec(
                        w['out_unif'].reshape((-1, 1))),
                                                     dim_input,
                                                     True,
                                                     colorImg=colorImg)
                    image.save(logdir + 'out_unif' + tail, 'PNG')

                if n_z == 2:
                    n_width = 10
                    import scipy.stats
                    z = {'z': np.zeros((2, n_width**2))}
                    for i in range(0, n_width):
                        for j in range(0, n_width):
                            z['z'][0, n_width * i + j] = scipy.stats.norm.ppf(
                                float(i) / n_width + 0.5 / n_width)
                            z['z'][1, n_width * i + j] = scipy.stats.norm.ppf(
                                float(j) / n_width + 0.5 / n_width)

                    x, _, _z = model.gen_xz({}, z, n_width**2)
                    if dataset == 'mnist':
                        x = 1 - _z['x']
                    image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input)
                    image.save(logdir + '2dmanifold' + tail, 'PNG')
                else:
                    _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                    x_samples = _z_confab['x']
                    image = paramgraphics.mat_to_img(f_dec(x_samples),
                                                     dim_input,
                                                     colorImg=colorImg)
                    image.save(logdir + 'samples' + tail, 'PNG')

                    #x_samples = _x['x']
                    #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
                    #image.save(logdir+'samples2'+tail, 'PNG')

            else:
                # Model with preprocessing

                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T),
                                                     dim_input,
                                                     True,
                                                     colorImg=colorImg)
                    image.save(logdir + 'q_w0' + tail, 'PNG')

                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]),
                                                 dim_input,
                                                 True,
                                                 colorImg=colorImg)
                image.save(logdir + 'out_w' + tail, 'PNG')

                _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                x_samples = f_dec(_z_confab['x'])
                x_samples = np.minimum(np.maximum(x_samples, 0), 1)
                image = paramgraphics.mat_to_img(x_samples,
                                                 dim_input,
                                                 colorImg=colorImg)
                image.save(logdir + 'samples' + tail, 'PNG')

    # Optimize
    #SFO
    dostep = epoch_vae_adam(model,
                            x,
                            n_batch=n_batch,
                            bernoulli_x=bernoulli_x,
                            byteToFloat=byteToFloat)
    loop_va(dostep, hook)

    pass
コード例 #6
0
ファイル: gpulearn_z_x.py プロジェクト: 2020zyc/nips14-ssl
def main(n_z, n_hidden, dataset, seed, comment, gfx=True):
    
    # Initialize logdir
    import time
    logdir = 'results/gpulearn_z_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'_'+comment+'_'+str(int(time.time()))+'/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print 'logdir:', logdir
    print 'gpulearn_z_x', n_z, n_hidden, dataset, seed
    with open(logdir+'hook.txt', 'a') as f:
        print >>f, 'learn_z_x', n_z, n_hidden, dataset, seed
    
    np.random.seed(seed)

    gfx_freq = 1
    
    weight_decay = 0
    f_enc, f_dec = lambda x:x, lambda x:x

    # Init data
    if dataset == 'mnist':
        import anglepy.data.mnist as mnist
        
        # MNIST
        size = 28
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size)
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': valid_x.astype(np.float32)}
        x_test = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (size,size)
        n_x = size*size
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        nonlinear = 'softplus'
        type_px = 'bernoulli'
        n_train = 50000
        n_batch = 1000
        colorImg = False
        bernoulli_x = True
        byteToFloat = False
        weight_decay = float(n_batch)/n_train
        
    if dataset == 'mnist_binarized':
        import anglepy.data.mnist_binarized as mnist_binarized
        # MNIST
        train_x, valid_x, test_x = mnist_binarized.load_numpy(28)
        x = {'x': np.hstack((train_x, valid_x)).astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (28,28)
        n_x = 28*28
        n_y = 10
        type_qz = 'gaussianmarg'
        type_pz = 'mog'
        nonlinear = 'rectlin'
        type_px = 'bernoulli'
        n_train = 60000
        n_batch = 1000
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch)/n_train
        
    elif dataset == 'freyface':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy()
        np.random.shuffle(train_x)
        x = {'x': train_x.T[:,0:n_train]}
        x_valid = {'x': train_x.T[:,n_train:]}
        L_valid = 1
        dim_input = (28,20)
        n_x = 20*28
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'bounded01'
        nonlinear = 'tanh'  #tanh works better with freyface #'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch)/n_train

    elif dataset == 'freyface_pca':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)
        
        f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        train_x = f_enc(train_x)
        
        x = {'x': train_x[:,0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:,n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28,20)
        n_x = train_x.shape[0]
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'freyface_bernoulli':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)
        
        x = {'x': train_x[:,0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:,n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28,20)
        n_x = train_x.shape[0]
        type_pz = 'gaussianmarg'
        type_px = 'bernoulli'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'norb':    
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size,size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900 #23400/900 = 27
        colorImg = False
        #binarize = False
        byteToFloat = False
        bernoulli_x = False
        weight_decay= float(n_batch)/train_x.shape[1]
    
    elif dataset == 'norb_pca':    
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

        f_enc, f_dec, _ = pp.PCA(train_x, 0.999)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)
        
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size,size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900 #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay= float(n_batch)/train_x.shape[1]

    elif dataset == 'norb_normalized':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

        #f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        f_enc, f_dec, _ = pp.normalize(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)
        
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size,size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900 #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay= float(n_batch)/train_x.shape[1]
        
    elif dataset == 'svhn':
        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, test_x, test_y = svhn.load_numpy(False, binarize_y=True) #norb.load_resized(size, binarize_y=True)
        extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
        x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))}
        ndict.shuffleCols(x)
        
        print 'Performing PCA, can take a few minutes... ',
        f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=600, toFloat=True)
        ndict.savez(pca_params, logdir+'pca_params')
        print 'Done.'
        
        n_y = 10
        x = {'x': f_enc(x['x']).astype(np.float32)}
        x_valid = {'x': f_enc(test_x).astype(np.float32)}
        L_valid = 1
        n_x = x['x'].shape[0]
        dim_input = (size,size)
        n_batch = 5000
        colorImg = True
        bernoulli_x = False
        byteToFloat = False
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
    
        
    # Construct model
    from anglepy.models import GPUVAE_Z_X
    updates = get_adam_optimizer(learning_rate=3e-4, weight_decay=weight_decay)
    model = GPUVAE_Z_X(updates, n_x, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3)
    
    if False:
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/'
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/'
        #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/'
        dir = '/home/ubuntu/results/gpulearn_z_x_mnist_50-[500, 500]__1414259423/'
        w = ndict.loadz(dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(dir+'v_best.ndict.tar.gz')
        ndict.set_value(model.w, w)
        ndict.set_value(model.v, v)
    
    # Some statistics for optimization
    ll_valid_stats = [-1e99, 0]
    
    # Progress hook
    def hook(epoch, t, ll):
        
        if epoch%10 != 0: return
        
        ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)
        
        # Log
        ndict.savez(ndict.get_value(model.v), logdir+'v')
        ndict.savez(ndict.get_value(model.w), logdir+'w')
        
        if ll_valid > ll_valid_stats[0]:
            ll_valid_stats[0] = ll_valid
            ll_valid_stats[1] = 0
            ndict.savez(ndict.get_value(model.v), logdir+'v_best')
            ndict.savez(ndict.get_value(model.w), logdir+'w_best')
        else:
            ll_valid_stats[1] += 1
            # Stop when not improving validation set performance in 100 iterations
            if ll_valid_stats[1] > 1000:
                print "Finished"
                with open(logdir+'hook.txt', 'a') as f:
                    print >>f, "Finished"
                exit()
        
        print epoch, t, ll, ll_valid, ll_valid_stats
        with open(logdir+'hook.txt', 'a') as f:
            print >>f, epoch, t, ll, ll_valid, ll_valid_stats

        # Graphics
        if gfx and epoch%gfx_freq == 0:
            
            #tail = '.png'
            tail = '-'+str(epoch)+'.png'
            
            v = {i: model.v[i].get_value() for i in model.v}
            w = {i: model.w[i].get_value() for i in model.w}
                
            if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset:
                
                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg)
                    image.save(logdir+'q_w0'+tail, 'PNG')
                
                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg)
                image.save(logdir+'out_w'+tail, 'PNG')
                
                if 'out_unif' in w:
                    image = paramgraphics.mat_to_img(f_dec(w['out_unif'].reshape((-1,1))), dim_input, True, colorImg=colorImg)
                    image.save(logdir+'out_unif'+tail, 'PNG')
                
                if n_z == 2:
                    n_width = 10
                    import scipy.stats
                    z = {'z':np.zeros((2,n_width**2))}
                    for i in range(0,n_width):
                        for j in range(0,n_width):
                            z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width)
                            z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width)
                    
                    x, _, _z = model.gen_xz({}, z, n_width**2)
                    if dataset == 'mnist':
                        x = 1 - _z['x']
                    image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input)
                    image.save(logdir+'2dmanifold'+tail, 'PNG')
                else:
                    _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                    x_samples = _z_confab['x']
                    image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg)
                    image.save(logdir+'samples'+tail, 'PNG')
                    
                    #x_samples = _x['x']
                    #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
                    #image.save(logdir+'samples2'+tail, 'PNG')
                    
            else:
                # Model with preprocessing
                
                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg)
                    image.save(logdir+'q_w0'+tail, 'PNG')
                    
                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg)
                image.save(logdir+'out_w'+tail, 'PNG')

                _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                x_samples = f_dec(_z_confab['x'])
                x_samples = np.minimum(np.maximum(x_samples, 0), 1)
                image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
                image.save(logdir+'samples'+tail, 'PNG')
                
                
                
    # Optimize
    #SFO
    dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat)
    loop_va(dostep, hook)
    
    pass
コード例 #7
0
def main(n_z, n_hidden, dataset, seed, comment, gfx=True):

    # Initialize logdir
    #---------------------
    # Setasouto:
    # Create the directory to save the outputs files and log.
    #---------------------
    import time
    logdir = 'results/gpulearn_z_x_' + dataset + '_' + str(n_z) + '-' + str(
        n_hidden) + '_' + comment + '_' + str(int(time.time())) + '/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print('logdir:', logdir)
    print('gpulearn_z_x', n_z, n_hidden, dataset, seed)
    with open(logdir + 'hook.txt', 'a') as f:
        print(f, 'learn_z_x', n_z, n_hidden, dataset, seed)

    np.random.seed(seed)

    gfx_freq = 1

    weight_decay = 0
    f_enc, f_dec = lambda x: x, lambda x: x

    # Init data
    if dataset == 'mnist':
        import anglepy.data.mnist as mnist

        # MNIST
        size = 28
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(
            size)
        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': valid_x.astype(np.float32)}
        x_test = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (size, size)
        n_x = size * size
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        nonlinear = 'softplus'
        type_px = 'bernoulli'
        n_train = 50000
        n_batch = 1000
        colorImg = False
        bernoulli_x = True
        byteToFloat = False
        weight_decay = float(n_batch) / n_train

    if dataset == 'mnist_binarized':
        import anglepy.data.mnist_binarized as mnist_binarized
        # MNIST
        train_x, valid_x, test_x = mnist_binarized.load_numpy(28)
        x = {'x': np.hstack((train_x, valid_x)).astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        dim_input = (28, 28)
        n_x = 28 * 28
        n_y = 10
        type_qz = 'gaussianmarg'
        type_pz = 'mog'
        nonlinear = 'rectlin'
        type_px = 'bernoulli'
        n_train = 60000
        n_batch = 1000
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / n_train

    elif dataset == 'freyface':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy()
        np.random.shuffle(train_x)
        x = {'x': train_x.T[:, 0:n_train]}
        x_valid = {'x': train_x.T[:, n_train:]}
        L_valid = 1
        dim_input = (28, 20)
        n_x = 20 * 28
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'bounded01'
        nonlinear = 'tanh'  #tanh works better with freyface #'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / n_train

    elif dataset == 'freyface_pca':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)

        f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        train_x = f_enc(train_x)

        x = {'x': train_x[:, 0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:, n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28, 20)
        n_x = train_x.shape[0]
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'freyface_bernoulli':
        # Frey's face
        import anglepy.data.freyface as freyface
        n_train = 1600
        train_x = freyface.load_numpy().T
        np.random.shuffle(train_x.T)

        x = {'x': train_x[:, 0:n_train].astype(np.float32)}
        x_valid = {'x': train_x[:, n_train:].astype(np.float32)}
        L_valid = 1
        dim_input = (28, 20)
        n_x = train_x.shape[0]
        type_pz = 'gaussianmarg'
        type_px = 'bernoulli'
        nonlinear = 'softplus'
        n_batch = 100
        colorImg = False
        bernoulli_x = False
        byteToFloat = False

    elif dataset == 'norb':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size,
                                                             binarize_y=True)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size, size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900  #23400/900 = 27
        colorImg = False
        #binarize = False
        byteToFloat = False
        bernoulli_x = False
        weight_decay = float(n_batch) / train_x.shape[1]

    elif dataset == 'norb_pca':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size,
                                                             binarize_y=True)

        f_enc, f_dec, _ = pp.PCA(train_x, 0.999)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size, size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900  #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / train_x.shape[1]

    elif dataset == 'norb_normalized':
        # small NORB dataset
        import anglepy.data.norb as norb
        size = 48
        train_x, train_y, test_x, test_y = norb.load_resized(size,
                                                             binarize_y=True)

        #f_enc, f_dec, _ = pp.PCA(train_x, 0.99)
        #f_enc, f_dec, _ = pp.normalize_random(train_x)
        f_enc, f_dec, _ = pp.normalize(train_x)
        train_x = f_enc(train_x)
        test_x = f_enc(test_x)

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': test_x.astype(np.float32)}
        L_valid = 1
        n_x = train_x.shape[0]
        dim_input = (size, size)
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'
        n_batch = 900  #23400/900 = 27
        colorImg = False
        #binarize = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / train_x.shape[1]

    elif dataset == 'svhn':
        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, test_x, test_y = svhn.load_numpy(
            False, binarize_y=True)  #norb.load_resized(size, binarize_y=True)
        extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
        x = {
            'x': np.hstack((train_x, extra_x)),
            'y': np.hstack((train_y, extra_y))
        }
        ndict.shuffleCols(x)

        print('Performing PCA, can take a few minutes... ',
              f_enc,
              f_dec,
              pca_params=pp.PCA(x['x'][:, :10000], cutoff=600, toFloat=True))
        ndict.savez(pca_params, logdir + 'pca_params')
        print('Done.')

        n_y = 10
        x = {'x': f_enc(x['x']).astype(np.float32)}
        x_valid = {'x': f_enc(test_x).astype(np.float32)}
        L_valid = 1
        n_x = x['x'].shape[0]
        dim_input = (size, size)
        n_batch = 5000
        colorImg = True
        bernoulli_x = False
        byteToFloat = False
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        type_px = 'gaussian'
        nonlinear = 'softplus'

    elif dataset == 'hyper':
        # Hyperspectral images:

        # Import 1 file of the dataset
        # TODO: import more files: Edit hyperspectralData.py

        #I added the hyperspectralData file in the anglepy library
        from hyperspectralData import HyperspectralData

        train_x, train_y, valid_x, valid_y, test_x, test_y = HyperspectralData(
        ).load_numpy(100000)

        #Dim input: How it has to be written like an image. We said that is:
        dim_input = (67, 4)
        n_x = train_x.shape[0]  #Dimension of our data vector.

        x = {'x': train_x.astype(np.float32)}
        x_valid = {'x': valid_x.astype(np.float32)}
        x_test = {'x': test_x.astype(np.float32)}
        L_valid = 1
        type_qz = 'gaussianmarg'
        type_pz = 'gaussianmarg'
        nonlinear = 'softplus'
        type_px = 'bernoulli'
        n_train = train_x.shape[1]
        n_batch = 1000
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        weight_decay = float(n_batch) / n_train
        #Write the hyperparameters used:
        with open(logdir + 'AA_hyperparameters.txt', 'w') as file:
            file.write("L_valid: " + str(L_valid) + '\n')
            file.write("type_qz: " + type_qz + '\n')
            file.write("type_pz: " + type_pz + '\n')
            file.write("Nonlinear: " + nonlinear + '\n')
            file.write("type_px: " + type_px + '\n')
            file.write("n_train: " + str(n_train) + '\n')
            file.write("n_batch: " + str(n_batch) + '\n')
            file.write("colorImg: " + str(colorImg) + '\n')
            file.write("bernoulli_x: " + str(bernoulli_x) + '\n')
            file.write("byteToFloat: " + str(byteToFloat) + '\n')
            file.close()
        # Write the headers for the csv file output:
        with open(logdir + 'AA_results.txt', 'w') as file:
            # Like a csv file:
            file.write("Step" + ',' + "TimeElapsed" + ',' +
                       "LowerboundMinibatch" + ',' + "LowerboundValid" + ',' +
                       "NumStepNotImproving" + '\n')
            file.close()

    # Construct model
    from anglepy.models import GPUVAE_Z_X
    updates = get_adam_optimizer(learning_rate=3e-4, weight_decay=weight_decay)
    model = GPUVAE_Z_X(updates,
                       n_x,
                       n_hidden,
                       n_z,
                       n_hidden[::-1],
                       nonlinear,
                       nonlinear,
                       type_px,
                       type_qz=type_qz,
                       type_pz=type_pz,
                       prior_sd=100,
                       init_sd=1e-3)
    #---------------
    # SetaSouto:
    # The [::-1] is to reverse the list.
    #---------------

    if False:
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/'
        #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/'
        #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/'
        #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/'
        dir = '/home/ubuntu/results/gpulearn_z_x_mnist_50-[500, 500]__1414259423/'
        w = ndict.loadz(dir + 'w_best.ndict.tar.gz')
        v = ndict.loadz(dir + 'v_best.ndict.tar.gz')
        ndict.set_value(model.w, w)
        ndict.set_value(model.v, v)

    # Some statistics for optimization
    ll_valid_stats = [-1e99, 0]

    # Progress hook
    def hook(epoch, t, ll):
        '''
        Documented by SetaSouto, may contains errors.

        :epoch: Number of the current step.
        :t: Time elapsed from the beginning.
        :ll: Loglikelihood (?).
        '''

        if epoch % 10 != 0: return

        ll_valid, _ = model.est_loglik(x_valid,
                                       n_samples=L_valid,
                                       n_batch=n_batch,
                                       byteToFloat=byteToFloat)

        # Log
        ndict.savez(ndict.get_value(model.v), logdir + 'v')
        ndict.savez(ndict.get_value(model.w), logdir + 'w')

        if ll_valid > ll_valid_stats[0]:
            ll_valid_stats[0] = ll_valid
            ll_valid_stats[1] = 0
            ndict.savez(ndict.get_value(model.v), logdir + 'v_best')
            ndict.savez(ndict.get_value(model.w), logdir + 'w_best')
        else:
            ll_valid_stats[1] += 1
            # Stop when not improving validation set performance in 100 iterations
            if ll_valid_stats[1] > 100:
                print("Finished")
                with open(logdir + 'hook.txt', 'a') as f:
                    print(f, "Finished")
                exit()

        # This will be showing the current results and write them in a file:
        with open(logdir + 'AA_results.txt', 'a') as file:
            # Like a csv file:
            file.write(
                str(epoch) + ',' + str(t) + ',' + str(ll) + ',' +
                str(ll_valid) + ',' + str(ll_valid_stats[1]) + '\n')
            file.close()
        print("-------------------------")
        print("Current results:")
        print(" ")
        print("Step:", epoch)
        print("Time elapsed:", t)
        print("Loglikelihood minibatch:", ll)
        print("Loglikelihood validSet:", ll_valid)
        print("N not improving:", ll_valid_stats[1])
        #print(epoch, t, ll, ll_valid, ll_valid_stats)

        #This print the file where are written the stats.
        #with open(logdir+'hook.txt', 'a') as f:
        #print(f, epoch, t, ll, ll_valid, ll_valid_stats)

        # Graphics
        if gfx and epoch % gfx_freq == 0:

            #tail = '.png'
            tail = '-' + str(epoch) + '.png'

            v = {i: model.v[i].get_value() for i in model.v}
            w = {i: model.w[i].get_value() for i in model.w}

            if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset:

                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T),
                                                     dim_input,
                                                     True,
                                                     colorImg=colorImg)
                    image.save(logdir + 'q_w0' + tail, 'PNG')

                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]),
                                                 dim_input,
                                                 True,
                                                 colorImg=colorImg)
                image.save(logdir + 'out_w' + tail, 'PNG')

                if 'out_unif' in w:
                    image = paramgraphics.mat_to_img(f_dec(
                        w['out_unif'].reshape((-1, 1))),
                                                     dim_input,
                                                     True,
                                                     colorImg=colorImg)
                    image.save(logdir + 'out_unif' + tail, 'PNG')

                if n_z == 2:
                    n_width = 10
                    import scipy.stats
                    z = {'z': np.zeros((2, n_width**2))}
                    for i in range(0, n_width):
                        for j in range(0, n_width):
                            z['z'][0, n_width * i + j] = scipy.stats.norm.ppf(
                                float(i) / n_width + 0.5 / n_width)
                            z['z'][1, n_width * i + j] = scipy.stats.norm.ppf(
                                float(j) / n_width + 0.5 / n_width)

                    x, _, _z = model.gen_xz({}, z, n_width**2)
                    if dataset == 'mnist':
                        x = 1 - _z['x']
                    image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input)
                    image.save(logdir + '2dmanifold' + tail, 'PNG')
                else:
                    _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                    x_samples = _z_confab['x']
                    image = paramgraphics.mat_to_img(f_dec(x_samples),
                                                     dim_input,
                                                     colorImg=colorImg)
                    image.save(logdir + 'samples' + tail, 'PNG')

                    #x_samples = _x['x']
                    #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
                    #image.save(logdir+'samples2'+tail, 'PNG')

            else:
                # Model with preprocessing

                if 'w0' in v:
                    image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T),
                                                     dim_input,
                                                     True,
                                                     colorImg=colorImg)
                    image.save(logdir + 'q_w0' + tail, 'PNG')

                image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]),
                                                 dim_input,
                                                 True,
                                                 colorImg=colorImg)
                image.save(logdir + 'out_w' + tail, 'PNG')

                _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144)
                x_samples = f_dec(_z_confab['x'])
                x_samples = np.minimum(np.maximum(x_samples, 0), 1)
                image = paramgraphics.mat_to_img(x_samples,
                                                 dim_input,
                                                 colorImg=colorImg)
                image.save(logdir + 'samples' + tail, 'PNG')

    # Optimize
    #SFO
    dostep = epoch_vae_adam(model,
                            x,
                            n_batch=n_batch,
                            bernoulli_x=bernoulli_x,
                            byteToFloat=byteToFloat)
    loop_va(dostep, hook)

    pass
コード例 #8
0
ファイル: gpulearn_mm_z_x.py プロジェクト: codeaudit/mmdgm
def main(n_z, n_hidden, dataset, seed, comment, alpha, decay1, decay2, gfx=True):
  
  # Initialize logdir
  pre_dir = 'models/gpulearn_z_x_mnist_96-(500, 500)'
  
  import time
  if os.environ.has_key('super_to_mean') and bool(int(os.environ['super_to_mean'])) == True:
    comment+='_super-to-mean'
  if os.environ.has_key('pretrain') and bool(int(os.environ['pretrain'])) == True:
    comment+='_pre-train'
  if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
    comment+='_prior'
    pre_dir+='_prior'
  if os.environ.has_key('cutoff'):
    comment+=('_'+str(int(os.environ['cutoff'])))
  if os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True:
    comment+='_train-residual'
    pre_dir+='_train-residual'
  if os.environ.has_key('sigma_square'):
    comment+=('_'+str(float(os.environ['sigma_square'])))
    pre_dir+=('_'+str(float(os.environ['sigma_square'])))
  pre_dir+='/'
    
  logdir = 'results/gpulearn_mm_z_x_'+dataset+'_'+str(n_z)+'-'+'_'.join(toStr(n_hidden))+comment+'_'+str(int(time.time()))+'/'
  if not os.path.exists(logdir): os.makedirs(logdir)
  print 'logdir:', logdir
  print 'gpulearn_mm_z_x'
  color.printBlue('dataset = ' + str(dataset) + ' , n_z = ' + str(n_z) + ' , n_hidden = ' + str(n_hidden))
  with open(logdir+'hook.txt', 'a') as f:
    print >>f, 'learn_z_x', n_z, n_hidden, dataset, seed
  
  np.random.seed(seed)

  gfx_freq = 1
  
  weight_decay = 0
  
  # Init data
  if dataset == 'mnist':
    import anglepy.data.mnist as mnist
    
    # MNIST
    size = 28
    train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size)
    f_enc, f_dec = pp.Identity()
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        mnist_prior = sio.loadmat('data/mnist_prior/mnist_prior.mat')
        train_mean_prior = mnist_prior['z_train']
        test_mean_prior = mnist_prior['z_test']
        valid_mean_prior = mnist_prior['z_valid']
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
        valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
        
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    
    
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 50000
    n_test = 10000
    n_valid = 10000
    n_batch = 1000
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
    
  elif dataset == 'higgs':
    size = 28
    f_enc, f_dec = pp.Identity()
    
    inputfile = 'data/higgs/HIGGS.csv'
    print 'loading file.'
    x = np.loadtxt(inputfile, dtype='f4', delimiter=',')
    print 'done.'
    y = x[:,0].reshape((-1,1))
    x = x[:,1:]
    x = np.array(x, dtype='float32')
    y = np.array(y, dtype='float32')
    n_train = 10000000 
    n_valid = 500000
    n_test  = 500000
    n_batch = 1000
    derived_feat = 'all'
    if os.environ.has_key('derived_feat'):
        derived_feat = os.environ['derived_feat']
        color.printBlue(derived_feat)
        
    if derived_feat == 'high':
        # Only the 7 high level features.
        x = x[:, 21:28]
    elif derived_feat == 'low':
        # Only the 21 raw features.
        x = x[:, 0:21]
    else:
        pass
    
    train_x = x[0:n_train, :].T
    y_train = y[0:n_train, :].astype(np.int32)
    valid_x = x[n_train:n_train+n_valid, :].T
    y_valid = y[n_train:n_train+n_valid, :].astype(np.int32)
    test_x = x[n_train+n_valid:n_train+n_valid+n_test, :].T
    y_test = y[n_train+n_valid:n_train+n_valid+n_test, :].astype(np.int32)
    n_y = 2
    n_x = train_x.shape[0]
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))

    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(y_train).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(y_valid).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(y_test).astype(np.float32)}
    
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    
    nonlinear = 'tanh'
    if os.environ.has_key('nonlinear'):
        nonlinear = os.environ['nonlinear']
        color.printBlue(nonlinear)
    
    L_valid = 1
    dim_input = (1,size)
    type_px = 'gaussian'
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'cifar10':
    import anglepy.data.cifar10 as cifar10
    size = 32
    train_x, train_y, test_x, test_y = cifar10.load_numpy()
    train_x = train_x.astype(np.float32).T
    test_x = test_x.astype(np.float32).T
    
    ## 
    f_enc, f_dec = pp.Identity()
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        cifar_prior = sio.loadmat('data/cifar10_prior/cifar10_prior.mat')
        train_mean_prior = cifar_prior['z_train']
        test_mean_prior = cifar_prior['z_test']
        #valid_mean_prior = cifar_prior['z_valid']
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
        
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    x_valid = x_test
    
    L_valid = 1
    dim_input = (size,size)
    n_y = 10
    n_x = x['x'].shape[0]
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'gaussian'
    if os.environ.has_key('type_px'):
        type_px = os.environ['type_px']
        color.printBlue('Generative type: '+type_px)
    n_train = 50000
    n_test = 10000
    n_batch = 5000
    colorImg = True
    bernoulli_x = False
    byteToFloat = False
    #weight_decay = float(n_batch)/n_train
    
  elif dataset == 'cifar10_zca':
    import anglepy.data.cifar10 as cifar10
    size = 32
    train_x, train_y, test_x, test_y = cifar10.load_numpy()
    
    train_x = train_x.astype(np.float32).T
    test_x = test_x.astype(np.float32).T
    
    ## 
    f_enc, f_dec = pp.Identity()
    zca_mean, zca_w, zca_winv = cifar10.zca(train_x)
    train_x = zca_w.dot(train_x-zca_mean)
    test_x = zca_w.dot(test_x-zca_mean)
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        cifar_prior = sio.loadmat('data/cifar10_prior/cifar10_prior.mat')
        train_mean_prior = cifar_prior['z_train']
        test_mean_prior = cifar_prior['z_test']
        #valid_mean_prior = cifar_prior['z_valid']
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
        
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    x_valid = x_test
    
    L_valid = 1
    dim_input = (size,size)
    n_y = 10
    n_x = x['x'].shape[0]
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'gaussian'
    if os.environ.has_key('type_px'):
        type_px = os.environ['type_px']
        color.printBlue('Generative type: '+type_px)
    n_train = 50000
    n_test = 10000
    n_batch = 5000
    colorImg = True
    bernoulli_x = False
    byteToFloat = False
    #weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_basic': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_'
    tmp = sio.loadmat(data_dir+'train.mat')
    #color.printRed(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
  
  elif dataset == 'rectangle': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_'
    tmp = sio.loadmat(data_dir+'train.mat')
    color.printRed(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,1000:]
    valid_y = train_y[1000:]
    train_x = train_x[:,:1000]
    train_y = train_y[:1000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 2
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 1000
    n_valid = 200
    n_test = 50000
    n_batch = 500
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'convex': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'convex_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,6000:]
    valid_y = train_y[6000:]
    train_x = train_x[:,:6000]
    train_y = train_y[:6000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 2
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 6000
    n_valid = 2000
    n_test = 50000
    n_batch = 120
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'rectangle_image': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_im_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 2
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_rot': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_rotation_normalized_float_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_back_rand': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_random_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_back_image': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_images_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
    
  elif dataset == 'mnist_back_image_rot': 
    # MNIST
    size = 28
    data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_background_images_rotation_normalized_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['x_train'].T
    train_y = tmp['t_train'].T.astype(np.int32)
    # validation 2000
    valid_x = train_x[:,10000:]
    valid_y = train_y[10000:]
    train_x = train_x[:,:10000]
    train_y = train_y[:10000]
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['x_test'].T
    test_y = tmp['t_test'].T.astype(np.int32)
    
    print train_x.shape
    print train_y.shape
    print test_x.shape
    print test_y.shape
    
    f_enc, f_dec = pp.Identity()
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    '''
    x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    '''
    x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)}
    x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    L_valid = 1
    dim_input = (size,size)
    n_x = size*size
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 10000
    n_valid = 2000
    n_test = 50000
    n_batch = 200
    colorImg = False
    bernoulli_x = True
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
  
  elif dataset == 'mnist_binarized_own':
    #import anglepy.data.mnist_binarized as mnist_binarized
    # MNIST
    import anglepy.data.mnist as mnist
    
    size = 28
    
    data_dir = 'data/mnist_binarized_own/'+'binarized_mnist_'
    tmp = sio.loadmat(data_dir+'train.mat')
    train_x = tmp['train_x'].T
    train_y = tmp['train_y'][0,:]
    #train_y = tmp['t_train'].T.astype(np.int32)
    tmp = sio.loadmat(data_dir+'test.mat')
    test_x = tmp['test_x'].T
    test_y = tmp['test_y'][0,:]
    tmp = sio.loadmat(data_dir+'valid.mat')
    #print tmp.keys()
    valid_x = tmp['valid_x'].T
    valid_y = tmp['valid_y'][0,:]
    #test_y = tmp['t_test'].T.astype(np.int32)
    
    f_enc, f_dec = pp.Identity()
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    valid_mean_prior = np.zeros((n_z,valid_x.shape[1]))
    
    train_x = np.hstack((train_x, valid_x)).astype(np.float32)
    train_mean_prior = np.hstack((train_mean_prior,valid_mean_prior)).astype(np.float32)
    train_y = np.hstack((train_y, valid_y))
    
    print train_mean_prior.shape
    print train_x.shape
    print train_y.shape
    
    x = {'x': train_x.astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32),'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_valid = {'x': test_x.astype(np.float32),'mean_prior':test_mean_prior.astype(np.float32),'y': labelToMat(test_y).astype(np.float32)}
    x_test = x_valid
    
    print x['y'].shape
    print x_valid['y'].shape
    
    L_valid = 1
    dim_input = (28,28)
    n_x = 28*28
    n_y = 10
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    nonlinear = 'softplus'
    type_px = 'bernoulli'
    n_train = 60000
    n_valid = 10000
    n_batch = 1000
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay = float(n_batch)/n_train
  
  elif dataset == 'norb_48_24300_pca':
    size = 48
    
    train_x, train_y, test_x, test_y = np.load('data/norb/norb_48_24300.npy')
    
    _x = {'x': train_x, 'y': train_y}
    #ndict.shuffleCols(_x)
    #train_x = _x['x']
    #train_y = _x['y']
    
    # Do PCA
    print 'pca'
    f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=500, toFloat=False)
    ndict.savez(pca_params, logdir+'pca_params')
    print 'done'
    
    train_mean_prior = np.zeros((n_z,train_x.shape[1]))
    test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    
    x = {'x': f_enc(train_x).astype(np.float32), 'mean_prior' : train_mean_prior.astype(np.float32), 'y':labelToMat(train_y).astype(np.float32)}
    x_valid = {'x': f_enc(test_x).astype(np.float32), 'mean_prior' : test_mean_prior.astype(np.float32), 'y':labelToMat(test_y).astype(np.float32)}
    x_test = {'x': f_enc(test_x).astype(np.float32), 'mean_prior' : test_mean_prior.astype(np.float32), 'y':labelToMat(test_y).astype(np.float32)}
     
    x_train = x
    
    print x['x'].shape
    print x['mean_prior'].shape
    print x['y'].shape
    
    
    L_valid = 1
    n_y = 5
    n_train = 24700
    n_test = 24700
    n_valid = 24700
    n_x = x['x'].shape[0]
    dim_input = (size,size)
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'
    n_batch = 900 #23400/900 = 27
    colorImg = False
    #binarize = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay= float(n_batch)/train_x.shape[1]
    
  
  elif dataset == 'norb':  
    import anglepy.data.norb as norb
    size = _size #48
    train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)
    _x = {'x': train_x, 'y': train_y}
    ndict.shuffleCols(_x)
    train_x = _x['x']
    train_y = _x['y']
    
    # Do PCA
    f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=1000, toFloat=False)
    ndict.savez(pca_params, logdir+'pca_params')
    
    x = {'x': f_enc(train_x).astype(np.float32), 'y':train_y.astype(np.float32)}
    x_valid = {'x': f_enc(test_x).astype(np.float32), 'y':test_y.astype(np.float32)}
    x_test = {'x': f_enc(test_x).astype(np.float32), 'y':test_y.astype(np.float32)}
    
    L_valid = 1
    n_x = x['x'].shape[0]
    n_y = 5
    dim_input = (size,size)
    n_batch = 1000 #23400/900 = 27
    colorImg = False
    bernoulli_x = False
    byteToFloat = False
    mosaic_w = 5
    mosaic_h = 1
    type_px = 'gaussian'
  
  elif dataset == 'norb_pca':  
    # small NORB dataset
    import anglepy.data.norb as norb
    size = 48
    train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True)

    f_enc, f_dec, _ = pp.PCA(train_x, 0.999)
    #f_enc, f_dec, _ = pp.normalize_random(train_x)
    train_x = f_enc(train_x)
    test_x = f_enc(test_x)
    
    x = {'x': train_x.astype(np.float32)}
    x_valid = {'x': test_x.astype(np.float32)}
    L_valid = 1
    n_x = train_x.shape[0]
    dim_input = (size,size)
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus'
    n_batch = 900 #23400/900 = 27
    colorImg = False
    #binarize = False
    bernoulli_x = False
    byteToFloat = False
    weight_decay= float(n_batch)/train_x.shape[1]

  elif dataset == 'svhn':
    # SVHN dataset
    #import anglepy.data.svhn as svhn
    
    size = 32
    train_x, train_y, test_x, test_y = np.load('data/svhn/svhn.npy')
    #extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
    #x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))}
    #ndict.shuffleCols(x)
    x = {'x' : train_x, 'y': train_y}
    
    print 'Performing PCA, can take a few minutes... '
    cutoff = 300
    if os.environ.has_key('cutoff'):
        cutoff = int(os.environ['cutoff'])
        color.printBlue('cutoff: '+str(cutoff))
        
    f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=cutoff, toFloat=True)
    ndict.savez(pca_params, logdir+'pca_params')
    print 'Done.'
    n_y = 10
    
    if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True:
        color.printBlue('Loading prior')
        train_mean_prior, train_y1, test_mean_prior, test_y1 = np.load('data/svhn/svhn_prior.npy')
        print np.sum((train_y1 == train_y).astype(np.int32))
        print np.sum((test_y1 == test_y).astype(np.int32))
        
    else:
        train_mean_prior = np.zeros((n_z,train_x.shape[1]))
        test_mean_prior = np.zeros((n_z,test_x.shape[1]))
    
    x = {'x': f_enc(x['x']).astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)}
    x_train = x
    x_test = {'x': f_enc(test_x).astype(np.float32), 'mean_prior':test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)}
    x_valid = x_test
    
    print x_train['x'].shape
    print x_test['x'].shape
    print train_y.shape
    print test_y.shape
    print x_train['mean_prior'].shape
    print x_test['mean_prior'].shape
    
    L_valid = 1
    n_x = x['x'].shape[0]
    dim_input = (size,size)
    n_batch = 5000
    n_train = 604388
    n_valid = 26032 
    n_test = 26032 
    colorImg = True
    bernoulli_x = False
    byteToFloat = False
    type_qz = 'gaussianmarg'
    type_pz = 'gaussianmarg'
    type_px = 'gaussian'
    nonlinear = 'softplus' 
 
  else:
    print 'invalid data set'
    exit()
    
  # Construct model
  from anglepy.models import GPUVAE_MM_Z_X
  learning_rate1 = 3e-4
  if os.environ.has_key('stepsize'):
    learning_rate1 = float(os.environ['stepsize'])
    color.printBlue(str(learning_rate1))
  updates = get_adam_optimizer(learning_rate=learning_rate1,decay1=decay1, decay2=decay2, weight_decay=weight_decay)
  model = GPUVAE_MM_Z_X(updates, n_x, n_y, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3)
  
  if os.environ.has_key('pretrain') and bool(int(os.environ['pretrain'])) == True:
    #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/'
    #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/'
    #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/'
    #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/'
    #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/'
    if len(n_hidden) == 1:
        color.printBlue('pre-training-1-layer')
        layer_str = '-500'
    elif len(n_hidden) == 2:
        color.printBlue('pre-training-2-layers')
        layer_str = '-(500, 500)'
    else:
        raise Exception()
        
    pre_str = 'models/gpulearn_z_x_'
    if dataset == 'mnist':
        if os.environ.has_key('predir'):
            pdr = int(os.environ['predir'])
            color.printRed('predir')
            if pdr == 1:
                dir = 'results/seed0_20000/'
            elif pdr == 2:
                dir = 'results/seedb_20000/'
            else:
                raise Exception('965 in mmva.')
        else:
            dir = 'models/mnist_z_x_50-500-500_longrun/'
      

    elif dataset == 'mnist_rot':
      dir = pre_str + 'mnist_rot_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_back_rand':
      dir = pre_str + 'mnist_back_rand_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_back_image':
      dir = pre_str + 'mnist_back_image_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_back_image_rot':
      dir = pre_str + 'mnist_back_image_rot_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'rectangle':
      dir = pre_str + 'rectangle_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'rectangle_image':
      dir = pre_str + 'rectangle_image_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'convex':
      dir = pre_str + 'convex_'+str(n_z)+layer_str+'_longrun/'
    elif dataset == 'mnist_basic':
      dir = pre_str + 'mnist_basic_'+str(n_z)+layer_str+'_longrun/'

    
    if dataset == 'svhn':
        if (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True):
            print 'prior-------------------'
            pre_dir = 'results/gpulearn_z_x_svhn_'+str(n_z)+'-500-500_prior_'+str(cutoff)+'_longrun/'
        else:
            pre_dir = 'results/gpulearn_z_x_svhn_'+str(n_z)+'-500-500_'+str(cutoff)+'_longrun/'
          
        color.printBlue(pre_dir)    
        w = ndict.loadz(pre_dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(pre_dir+'v_best.ndict.tar.gz')
        
    elif n_z == 50:
        print 'n_z = 50', dir
        w = ndict.loadz(dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(dir+'v_best.ndict.tar.gz')
    else:
        w = ndict.loadz(pre_dir+'w_best.ndict.tar.gz')
        v = ndict.loadz(pre_dir+'v_best.ndict.tar.gz')
    ndict.set_value2(model.w, w)
    ndict.set_value2(model.v, v)
  
  # Some statistics for optimization
  ll_valid_stats = [-1e99, 0, 0]
  predy_valid_stats = [1, 0, 0, 0]
  predy_test_stats = [0, 1, 0]
  
  # Progress hook
  def hook(epoch, t, ll):
    
    if epoch==-1:
        ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)
        ll_test, _ = model.est_loglik(x_test, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)

        #print 'Likelihood of the pre-trained model: ', ll_valid, ll_test

    if epoch%10 != 0: return
    
    
    ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)
    ll_test = ll_valid
    #if not dataset == 'mnist_binarized':
    if not dataset == 'svhn':
        ll_test, _ = model.est_loglik(x_test, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat)
    
    # Log
    ndict.savez(ndict.get_value(model.v), logdir+'v')
    ndict.savez(ndict.get_value(model.w), logdir+'w')
    
    if True:
        ndict.get_value
    
    if ll_valid > ll_valid_stats[0]:
      ll_valid_stats[0] = ll_valid
      ll_valid_stats[1] = 0
      ll_valid_stats[2] = epoch
      ndict.savez(ndict.get_value(model.v), logdir+'v_best')
      ndict.savez(ndict.get_value(model.w), logdir+'w_best')
    else:
      ll_valid_stats[1] += 1
      # Stop when not improving validation set performance in 100 iterations
      if ll_valid_stats[1] > 1000:
        print "Finished"
        with open(logdir+'hook.txt', 'a') as f:
          print >>f, "Finished"
        exit()
    
    # Graphics
    if gfx and epoch%gfx_freq == 0:
      
      #tail = '.png'
      tail = '-'+str(epoch)+'.png'
      
      v = {i: model.v[i].get_value() for i in model.v}
      w = {i: model.w[i].get_value() for i in model.w}
      
      if True:
        def infer(data, n_batch=1000):
            size = data['x'].shape[1]
            #res = np.zeros((sum(n_hidden), size))
            #res3 = np.zeros((n_z,size))
            #res1 = np.zeros((n_z,size))
            predy = []
            for i in range(0, size, n_batch):
              idx_to = min(size, i+n_batch)
              x_batch = ndict.getCols(data, i, idx_to)
              
              # may have bugs
              nn_batch = idx_to - i
              
              _x, _z, _z_confab = model.gen_xz(x_batch, {}, nn_batch)
              x_samples = _z_confab['x']
              predy += list(_z_confab['predy'])        
              #for (hi, hidden) in enumerate(_z_confab['hidden']):
              #  res[sum(n_hidden[:hi]):sum(n_hidden[:hi+1]),i:i+nn_batch] = hidden
              #res3[:,i:i+nn_batch] = _z_confab['logvar']
              #res1[:,i:i+nn_batch] = _z_confab['mean']
              
            stats = dict()
            #if epoch == -1:
            #  print 'features: ', res.shape
            return predy #(res, predy, _z, res1, res3)
        
        def evaluate(data, predy):
            y = np.argmax(data['y'], axis=0)
            return sum([int(yi != py) for (yi, py) in zip(y, predy)]) / float(len(predy))
        #if not dataset == 'mnist_binarized':
        #(z_test, pred_test,_z_test,z_test1,vv_test) = infer(x_test)
        pred_test = infer(x_test)
        pred_train = infer(x_train)
        if not dataset == 'svhn':
            n_used = 1000
            if n_valid < 2 * n_used:
                n_used = n_valid / 2
            pred_valid = infer(x_valid,n_used)
            
        #predy_valid = predy_test
        #(z_train, pred_train, _z_train,z_train1,vv_train) = infer(x_train)
          
        #l_t, px_t, pz_t, qz_t = model.test(x_train, n_samples=1, n_batch=n_batch, byteToFloat=byteToFloat)
          
        '''
          print 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t
          #sigma_square = float(os.environ['sigma_square'])
          print 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior))
        '''
        #with open(logdir+'hook.txt', 'a') as f:
        #    print >>f, 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t
        #    print >>f, 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior))
          
        #
        pre_train = evaluate(x_train, pred_train)
        pre_test = evaluate(x_test, pred_test)
        pre_valid = pre_test
        if not dataset == 'svhn':
            pre_valid = evaluate(x_valid, pred_valid)
        
        if pre_valid < predy_valid_stats[0]:
            predy_valid_stats[0] = pre_valid
            predy_valid_stats[1] = pre_test
            predy_valid_stats[2] = epoch
            predy_valid_stats[3] = 0
          
            ndict.savez(ndict.get_value(model.v), logdir+'v_best_predy')
            ndict.savez(ndict.get_value(model.w), logdir+'w_best_predy')
        else:
            predy_valid_stats[3] += 1
            # Stop when not improving validation set performance in 100 iterations
            if predy_valid_stats[3] > 10000 and model.param_c.get_value() > 0:
              print "Finished"
              with open(logdir+'hook.txt', 'a') as f:
                print >>f, "Finished"
              exit()
        if pre_test < predy_test_stats[1]:
            predy_test_stats[0] = pre_valid
            predy_test_stats[1] = pre_test
            predy_test_stats[2] = epoch
          
          
        #print 'c = ', model.param_c.get_value()
        print 'epoch', epoch, 't', t, 'll', ll, 'll_valid', ll_valid, 'll_test', ll_test#, 'valid_stats', ll_valid_stats
        print 'train_err = ', pre_train, 'valid_err = ', pre_valid, 'test_err = ', pre_test
        print '--best: predy_valid_stats', predy_valid_stats#, 'predy_test_stats', predy_test_stats
        with open(logdir+'hook.txt', 'a') as f:
            print >>f, 'epoch', epoch, 't', t, 'll', ll, 'll_valid', ll_valid,'ll_test', ll_test, ll_valid_stats
            print >>f, 'train_err = ', pre_train, 'valid_err = ', pre_valid, 'test_err = ', pre_test
            print >>f, '--best: predy_valid_stats', predy_valid_stats#, 'predy_test_stats', predy_test_stats
        #if not dataset == 'mnist_binarized':
        #sio.savemat(logdir+'latent.mat', {'z_test': z_test, 'z_train': z_train})      

      
      if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset:
        #print 'lallaaa'
        
        if 'w0' in v:
          image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg)
          image.save(logdir+'q_w0'+tail, 'PNG')
        
        image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg)
        image.save(logdir+'out_w'+tail, 'PNG')
        
        if 'out_unif' in w:
          image = paramgraphics.mat_to_img(f_dec(w['out_unif'].reshape((-1,1))), dim_input, True, colorImg=colorImg)
          image.save(logdir+'out_unif'+tail, 'PNG')
        
        if n_z == 2:
          n_width = 10
          import scipy.stats
          z = {'z':np.zeros((2,n_width**2))}
          for i in range(0,n_width):
            for j in range(0,n_width):
              z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width)
              z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width)
          
          x, _, _z = model.gen_xz({}, z, n_width**2)
          if dataset == 'mnist':
            x = 1 - _z['x']
          image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input)
          image.save(logdir+'2dmanifold'+tail, 'PNG')
        else: 
          if 'norb' in dataset or dataset == 'svhn':
            nn_batch_nn = 64
          else:
            nn_batch_nn = 144
          
          if not(os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True) and (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True):
            
            
            mp_in = np.random.randint(0,x_train['mean_prior'].shape[1],nn_batch_nn)
            m_p = x_train['mean_prior'][:,mp_in]
            s_s = 1
            if os.environ.has_key('sigma_square'):
                s_s = float(os.environ['sigma_square'])
            x_samples = model.gen_xz_prior({}, {}, m_p, s_s, n_batch=nn_batch_nn)
            x_samples = x_samples['x']
            m_p1 = (np.ones((n_z, nn_batch_nn)).T * np.mean(x_train['mean_prior'], axis = 1)).T
            x_samples1 = model.gen_xz_prior({}, {}, m_p1.astype(np.float32), s_s, n_batch=nn_batch_nn)
            image = paramgraphics.mat_to_img(f_dec(x_samples1['x']), dim_input, colorImg=colorImg)
            image.save(logdir+'mean_samples-prior'+tail, 'PNG')
            x_samples11 = model.gen_xz_prior11({}, {}, m_p, s_s, n_batch=nn_batch_nn)
            image = paramgraphics.mat_to_img(f_dec(x_samples11['x']), dim_input, colorImg=colorImg)
            image.save(logdir+'prior-image'+tail, 'PNG')
            
          else:
            _x, _, _z_confab = model.gen_xz({}, {}, n_batch=nn_batch_nn)
            x_samples = _z_confab['x']
            
          image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg)
          image.save(logdir+'samples-prior'+tail, 'PNG')
          
      else:
      # Model with preprocessing
        if 'w0' in v:
          tmp = f_dec(v['w0'][:].T)
          if 'zca' in dataset:
            tmp = zca_dec(zca_mean, zca_winv, tmp)
          image = paramgraphics.mat_to_img(tmp, dim_input, True, colorImg=colorImg)
          image.save(logdir+'q_w0'+tail, 'PNG')
        
        tmp = f_dec(w['out_w'][:])
        if 'zca' in dataset:
          tmp = zca_dec(zca_mean, zca_winv, tmp)
            
        image = paramgraphics.mat_to_img(tmp, dim_input, True, colorImg=colorImg)
        image.save(logdir+'out_w'+tail, 'PNG')
        if dataset == 'svhn':
            nn_batch_nn = 64
        else:
            nn_batch_nn = 144
            
        if not(os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True) and (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True):

          mp_in = np.random.randint(0,x_train['mean_prior'].shape[1],nn_batch_nn)
          m_p = x_train['mean_prior'][:,mp_in]
          s_s = 1
          if os.environ.has_key('sigma_square'):
              s_s = float(os.environ['sigma_square'])
          x_samples = model.gen_xz_prior({}, {}, m_p, s_s, n_batch=nn_batch_nn)
          x_samples = zca_dec(zca_mean, zca_winv,x_samples['x'])
          x_samples = np.minimum(np.maximum(x_samples, 0), 1)  
          x_samples11 = model.gen_xz_prior11({}, {}, m_p, s_s, n_batch=nn_batch_nn)
          x_samples11 = zca_dec(zca_mean,zca_winv,x_samples11['x'])
          x_samples11 = np.minimum(np.maximum(x_samples11, 0), 1)    
          image = paramgraphics.mat_to_img(x_samples11, dim_input, colorImg=colorImg)
          image.save(logdir+'prior-image'+tail, 'PNG')
        else:
          _x, _z, _z_confab = model.gen_xz({}, {}, n_batch=nn_batch_nn)
          x_samples = f_dec(_z_confab['x'])
          x_samples = np.minimum(np.maximum(x_samples, 0), 1)
        image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg)
        image.save(logdir+'samples'+tail, 'PNG')

  # Optimize
  #SFO
  dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat)
  loop_va(model, dostep, hook)
  
  pass
コード例 #9
0
ファイル: wzoptim.py プロジェクト: Beronx86/anglepy
def optim_vae_sfo(model, x, v_init, w_init, n_batch, n_passes, hook, n_resample=20, resample_keepmem=False, bernoulli_x=False, display=0):
    
    # Shuffle columns of dataset x
    ndict.shuffleCols(x)
    
    # create minibatches
    n_tot = x.itervalues().next().shape[1]
    minibatches = []
    n_minibatches = n_tot / n_batch
    if (n_tot%n_batch) != 0: raise Exception()
    
    # Divide into minibatches
    def make_minibatch(i):
        _x = ndict.getCols(x, i * n_batch, (i+1) * n_batch)
        _eps = model.gen_eps(n_batch)
        if bernoulli_x: _x['x'] = np.random.binomial(n=1, p=_x['x'])
        return [i, _x, _eps]

    for i in range(n_minibatches):
        minibatches.append(make_minibatch(i))
      
    L = [0.]
    n_L = [0]
    
    def f_df(w, minibatch):
        
        i_minibatch = minibatch[0]
        x_minibatch = minibatch[1]
        eps_minibatch = minibatch[2]
        
        # Get gradient
        logpx, logpz, logqz, gv, gw = model.dL_dw(w['v'], w['w'], x_minibatch, eps_minibatch)
        
        # Get gradient w.r.t. priors
        logpv, logpw, gv_prior, gw_prior = model.dlogpw_dw(w['v'], w['w'])
        gv = {i: gv[i] + float(n_batch)/n_tot * gv_prior[i] for i in gv}
        gw = {i: gw[i] + float(n_batch)/n_tot * gw_prior[i] for i in gw}
        
        f = (logpx.sum() + logpz.sum() - logqz.sum())
        L[0] += -f/(1.*n_batch)
        n_L[0] += 1
        f += float(n_batch)/n_tot * logpv
        f += float(n_batch)/n_tot * logpw
        
        for i in gv: gv[i] *= -1./n_batch
        for i in gw: gw[i] *= -1./n_batch
        f *= -1./n_batch
        
        #print 'norms gv:'
        #ndict.pNorm(gv)
        #print 'norms gw'
        #ndict.pNorm(gw)
        
        return f, {'v':gv,'w':gw}
    
    w_init = {'v':v_init, 'w':w_init}
    
    from sfo import SFO
    optimizer = SFO(f_df, w_init, minibatches, display=display)
    
    #optimizer.check_grad()
    
    # loop
    for i in range(n_passes):
        w = optimizer.optimize(num_passes=1)
        LB = L[0]/(1.*n_L[0])
        hook(i, w['v'], w['w'], LB)
        L[0] = 0
        n_L[0] = 0
        # Reset noise epsilon of some minibatches
        for j in range(n_minibatches):
            if n_resample > 0 and i%n_resample == j%n_resample:
                minibatches[j] = make_minibatch(j)
                optimizer.replace_subfunction(j, resample_keepmem, minibatches[j])
        
    print "Finished!"
コード例 #10
0
def main(n_z, n_hidden, dataset, seed, gfx=True, _size=None):
    '''Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z).
    x and y are (always) observed.
    I.e. this cannot be used for semi-supervised learning
    '''
    assert (type(n_hidden) == tuple or type(n_hidden) == list)
    assert type(n_z) == int
    assert isinstance(dataset, str)

    print('gpulearn_yz_x', n_z, n_hidden, dataset, seed)

    import time
    logdir = 'results/gpulearn_yz_x_' + dataset + '_' + str(n_z) + '-' + str(
        n_hidden) + '-' + str(int(time.time())) + '/'
    if not os.path.exists(logdir): os.makedirs(logdir)
    print('logdir:', logdir)

    np.random.seed(seed)

    # Init data
    if dataset == 'mnist':
        '''
        What works well:
        100-2-100 (Generated digits stay bit shady)
        1000-2-1000 (Needs pretty long training)
        '''
        import anglepy.data.mnist as mnist

        # MNIST
        size = 28
        train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(
            size, binarize_y=True)
        f_enc, f_dec = lambda x: x, lambda x: x
        x = {
            'x': train_x[:, :].astype(np.float32),
            'y': train_y[:, :].astype(np.float32)
        }
        x_valid = {
            'x': valid_x.astype(np.float32),
            'y': valid_y.astype(np.float32)
        }
        L_valid = 1
        dim_input = (size, size)
        n_x = size * size
        n_y = 10
        n_batch = 1000
        colorImg = False
        bernoulli_x = True
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 2
        type_px = 'bernoulli'

    elif dataset == 'norb':
        # resized NORB dataset, reshuffled
        import anglepy.data.norb as norb
        size = _size  #48
        train_x, train_y, test_x, test_y = norb.load_resized(size,
                                                             binarize_y=True)
        _x = {'x': train_x, 'y': train_y}
        ndict.shuffleCols(_x)
        train_x = _x['x']
        train_y = _x['y']

        # Do PCA
        f_enc, f_dec, pca_params = pp.PCA(_x['x'][:, :10000],
                                          cutoff=2000,
                                          toFloat=False)
        ndict.savez(pca_params, logdir + 'pca_params')

        x = {
            'x': f_enc(train_x).astype(np.float32),
            'y': train_y.astype(np.float32)
        }
        x_valid = {
            'x': f_enc(test_x).astype(np.float32),
            'y': test_y.astype(np.float32)
        }

        L_valid = 1
        n_x = x['x'].shape[0]
        n_y = 5
        dim_input = (size, size)
        n_batch = 1000  #23400/900 = 27
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 1
        type_px = 'gaussian'

    elif dataset == 'norb_instances':
        # resized NORB dataset with the instances as classes
        import anglepy.data.norb2 as norb2
        size = _size  #48
        x, y = norb2.load_numpy_subclasses(size, binarize_y=True)
        _x = {'x': x, 'y': y}
        ndict.shuffleCols(_x)

        # Do pre=processing
        if True:
            # Works
            f_enc, f_dec, pca_params = pp.PCA(_x['x'][:, :10000],
                                              cutoff=600,
                                              global_sd=True,
                                              toFloat=True)
            ndict.savez(pca_params, logdir + 'pca_params')
        elif False:
            # Doesn't work
            f_enc, f_dec, pp_params = pp.normalize_noise(_x['x'][:, :50000],
                                                         noise_sd=0.01,
                                                         global_sd=True,
                                                         toFloat=True)
        else:
            # Doesn't work
            f_enc, f_dec, params = pp.normalize_random(x=x[:, :10000],
                                                       global_sd=True,
                                                       toFloat=True)
            ndict.savez(params, logdir + 'normalize_random_params')

        n_valid = 5000
        x = {
            'x': f_enc(_x['x'][:, :-n_valid]).astype(np.float32),
            'y': _x['y'][:, :-n_valid].astype(np.float32)
        }
        x_valid = {
            'x': f_enc(_x['x'][:, :n_valid]).astype(np.float32),
            'y': _x['y'][:, :n_valid].astype(np.float32)
        }

        L_valid = 1
        n_x = x['x'].shape[0]
        n_y = 50
        dim_input = (size, size)
        n_batch = 5000  #23400/900 = 27
        colorImg = False
        bernoulli_x = False
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 1
        type_px = 'gaussian'

    elif dataset == 'svhn':
        # SVHN dataset
        import anglepy.data.svhn as svhn
        size = 32
        train_x, train_y, test_x, test_y = svhn.load_numpy(
            False, binarize_y=True)  #norb.load_resized(size, binarize_y=True)
        extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True)
        x = {
            'x': np.hstack((train_x, extra_x)),
            'y': np.hstack((train_y, extra_y))
        }
        ndict.shuffleCols(x)

        #f_enc, f_dec, (x_sd, x_mean) = pp.preprocess_normalize01(train_x, True)
        f_enc, f_dec, pca_params = pp.PCA(x['x'][:, :10000],
                                          cutoff=1000,
                                          toFloat=True)
        ndict.savez(pca_params, logdir + 'pca_params')

        n_y = 10
        x = {
            'x': f_enc(x['x']).astype(np.float32),
            'y': x['y'].astype(np.float32)
        }
        x_valid = {
            'x': f_enc(test_x).astype(np.float32),
            'y': test_y.astype(np.float32)
        }
        L_valid = 1
        n_x = x['x'].shape[0]
        dim_input = (size, size)
        n_batch = 5000
        colorImg = True
        bernoulli_x = False
        byteToFloat = False
        mosaic_w = 5
        mosaic_h = 2
        type_px = 'gaussian'

    # Init model
    n_hidden_q = n_hidden
    n_hidden_p = n_hidden
    from anglepy.models import GPUVAE_YZ_X
    updates = get_adam_optimizer(alpha=3e-4,
                                 beta1=0.9,
                                 beta2=0.999,
                                 weight_decay=0)
    model = GPUVAE_YZ_X(updates,
                        n_x,
                        n_y,
                        n_hidden_q,
                        n_z,
                        n_hidden_p[::-1],
                        'softplus',
                        'softplus',
                        type_px=type_px,
                        type_qz='gaussianmarg',
                        type_pz='gaussianmarg',
                        prior_sd=1,
                        uniform_y=True)

    if False:
        dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414094291/'
        dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414163488/'
        w = ndict.loadz(dir + 'w_best.ndict.tar.gz')
        v = ndict.loadz(dir + 'v_best.ndict.tar.gz')
        ndict.set_value(model.w, w)
        ndict.set_value(model.v, v)

    # Some statistics for optimization
    ll_valid_stats = [-1e99, 0]

    # Fixed sample for visualisation
    z_sample = {
        'z':
        np.repeat(np.random.standard_normal(size=(n_z, 12)), 12,
                  axis=1).astype(np.float32)
    }
    y_sample = {
        'y':
        np.tile(
            np.random.multinomial(1, [1. / n_y] * n_y, size=12).T, (1, 12))
    }

    # Progress hook
    def hook(epoch, t, ll):

        if epoch % 10 != 0:
            return

        ll_valid, _ = model.est_loglik(x_valid,
                                       n_samples=L_valid,
                                       n_batch=n_batch,
                                       byteToFloat=byteToFloat)

        if math.isnan(ll_valid):
            print("NaN detected. Reverting to saved best parameters")
            ndict.set_value(model.v, ndict.loadz(logdir + 'v.ndict.tar.gz'))
            ndict.set_value(model.w, ndict.loadz(logdir + 'w.ndict.tar.gz'))
            return

        if ll_valid > ll_valid_stats[0]:
            ll_valid_stats[0] = ll_valid
            ll_valid_stats[1] = 0
            ndict.savez(ndict.get_value(model.v), logdir + 'v_best')
            ndict.savez(ndict.get_value(model.w), logdir + 'w_best')
        else:
            ll_valid_stats[1] += 1
            # Stop when not improving validation set performance in 100 iterations
            if False and ll_valid_stats[1] > 1000:
                print("Finished")
                with open(logdir + 'hook.txt', 'a') as f:
                    print("Finished", file=f)
                exit()

        # Log
        ndict.savez(ndict.get_value(model.v), logdir + 'v')
        ndict.savez(ndict.get_value(model.w), logdir + 'w')
        print(epoch, t, ll, ll_valid)
        with open(logdir + 'hook.txt', 'a') as f:
            print(t, ll, ll_valid, file=f)

        if gfx:
            # Graphics

            v = {i: model.v[i].get_value() for i in model.v}
            w = {i: model.w[i].get_value() for i in model.w}

            tail = '-' + str(epoch) + '.png'

            image = paramgraphics.mat_to_img(f_dec(v['w0x'][:].T),
                                             dim_input,
                                             True,
                                             colorImg=colorImg)
            image.save(logdir + 'q_w0x' + tail, 'PNG')

            image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]),
                                             dim_input,
                                             True,
                                             colorImg=colorImg)
            image.save(logdir + 'out_w' + tail, 'PNG')

            _x = {'y': np.random.multinomial(1, [1. / n_y] * n_y, size=144).T}
            _, _, _z_confab = model.gen_xz(_x, {}, n_batch=144)
            image = paramgraphics.mat_to_img(f_dec(_z_confab['x']),
                                             dim_input,
                                             colorImg=colorImg)
            image.save(logdir + 'samples' + tail, 'PNG')

            _, _, _z_confab = model.gen_xz(y_sample, z_sample, n_batch=144)
            image = paramgraphics.mat_to_img(f_dec(_z_confab['x']),
                                             dim_input,
                                             colorImg=colorImg)
            image.save(logdir + 'samples_fixed' + tail, 'PNG')

            if n_z == 2:

                import Image
                import ImageFont
                import ImageDraw

                n_width = 10
                submosaic_offset = 15
                submosaic_width = (dim_input[1] * n_width)
                submosaic_height = (dim_input[0] * n_width)
                mosaic = Image.new(
                    "RGB", (submosaic_width * mosaic_w,
                            submosaic_offset + submosaic_height * mosaic_h))

                for digit in range(0, n_y):
                    if digit >= mosaic_h * mosaic_w: continue

                    _x = {}
                    n_batch_plot = n_width * n_width
                    _x['y'] = np.zeros((n_y, n_batch_plot))
                    _x['y'][digit, :] = 1
                    _z = {'z': np.zeros((2, n_width**2))}
                    for i in range(0, n_width):
                        for j in range(0, n_width):
                            _z['z'][0, n_width * i + j] = scipy.stats.norm.ppf(
                                float(i) / n_width + 0.5 / n_width)
                            _z['z'][1, n_width * i + j] = scipy.stats.norm.ppf(
                                float(j) / n_width + 0.5 / n_width)

                    _x, _, _z_confab = model.gen_xz(_x,
                                                    _z,
                                                    n_batch=n_batch_plot)
                    x_samples = _z_confab['x']
                    image = paramgraphics.mat_to_img(f_dec(x_samples),
                                                     dim_input,
                                                     colorImg=colorImg,
                                                     tile_spacing=(0, 0))

                    #image.save(logdir+'samples_digit_'+str(digit)+'_'+tail, 'PNG')
                    mosaic_x = (digit % mosaic_w) * submosaic_width
                    mosaic_y = submosaic_offset + int(
                        digit / mosaic_w) * submosaic_height
                    mosaic.paste(image, (mosaic_x, mosaic_y))

                draw = ImageDraw.Draw(mosaic)
                draw.text((1, 1),
                          "Epoch #" + str(epoch) + " Loss=" + str(int(ll)))

                #plt.savefig(logdir+'mosaic'+tail, format='PNG')
                mosaic.save(logdir + 'mosaic' + tail, 'PNG')

                #x_samples = _x['x']
                #image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg)
                #image.save(logdir+'samples2'+tail, 'PNG')

    # Optimize
    dostep = epoch_vae_adam(model,
                            x,
                            n_batch=n_batch,
                            bernoulli_x=bernoulli_x,
                            byteToFloat=byteToFloat)
    loop_va(dostep, hook)

    pass