def PCA_fromfile(fname, toFloat=False): pca = ndict.loadz(fname) return PCA_encdec(pca['eigvec'],pca['eigval'],pca['x_center'],pca['x_sd'], toFloat)
def main(n_z, n_hidden, dataset, seed, comment, gfx=True): # Initialize logdir import time pre_dir = 'models/gpulearn_z_x_mnist_96-(500, 500)' if os.environ.has_key('pretrain') and bool(int(os.environ['pretrain'])) == True: comment+='_pre-train' if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True: comment+='_prior' pre_dir+='_prior' if os.environ.has_key('cutoff'): comment+=('_'+str(int(os.environ['cutoff']))) if os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True: comment+='_train-residual' pre_dir+='_train-residual' if os.environ.has_key('sigma_square'): comment+=('_'+str(float(os.environ['sigma_square']))) pre_dir+=('_'+str(float(os.environ['sigma_square']))) pre_dir+='/' logdir = 'results/gpulearn_z_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+comment+'_'+str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir print 'gpulearn_z_x', n_z, n_hidden, dataset, seed with open(logdir+'hook.txt', 'a') as f: print >>f, 'learn_z_x', n_z, n_hidden, dataset, seed np.random.seed(seed) gfx_freq = 1 weight_decay = 0 # Init data if dataset == 'mnist': import anglepy.data.mnist as mnist # MNIST size = 28 train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size) f_enc, f_dec = pp.Identity() if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True: color.printBlue('Loading prior') mnist_prior = sio.loadmat('data/mnist_prior/mnist_prior.mat') train_mean_prior = mnist_prior['z_train'] test_mean_prior = mnist_prior['z_test'] valid_mean_prior = mnist_prior['z_valid'] else: train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) print '---------------------', type(train_x) x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} print '---------------------', type(x_train) L_valid = 1 dim_input = (size,size) n_x = size*size type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 50000 n_test = 10000 n_valid = 10000 n_batch = 1000 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'higgs': size = 28 f_enc, f_dec = pp.Identity() inputfile = 'data/higgs/HIGGS.csv' print 'loading file.' x = np.loadtxt(inputfile, dtype='f4', delimiter=',') print 'done.' y = x[:,0].reshape((-1,1)) x = x[:,1:] x = np.array(x, dtype='float32') y = np.array(y, dtype='float32') n_train = 10000000 n_valid = 500000 n_test = 500000 n_batch = 1000 derived_feat = 'all' if os.environ.has_key('derived_feat'): derived_feat = os.environ['derived_feat'] color.printBlue(derived_feat) if derived_feat == 'high': # Only the 7 high level features. x = x[:, 21:28] elif derived_feat == 'low': # Only the 21 raw features. x = x[:, 0:21] else: pass train_x = x[0:n_train, :].T y_train = y[0:n_train, :] valid_x = x[n_train:n_train+n_valid, :].T y_valid = y[n_train:n_train+n_valid, :] test_x = x[n_train+n_valid:n_train+n_valid+n_test, :].T y_test = y[n_train+n_valid:n_train+n_valid+n_test, :] n_y = 2 n_x = train_x.shape[0] train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'tanh' if os.environ.has_key('nonlinear'): nonlinear = os.environ['nonlinear'] color.printBlue(nonlinear) L_valid = 1 dim_input = (1,size) type_px = 'gaussian' colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'cifar10': import anglepy.data.cifar10 as cifar10 size = 32 train_x, train_y, test_x, test_y = cifar10.load_numpy() train_x = train_x.astype(np.float32).T test_x = test_x.astype(np.float32).T ## f_enc, f_dec = pp.Identity() if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True: color.printBlue('Loading prior') cifar_prior = sio.loadmat('data/cifar10_prior/cifar10_prior.mat') train_mean_prior = cifar_prior['z_train'] test_mean_prior = cifar_prior['z_test'] else: train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} x_valid = x_test L_valid = 1 n_y = 10 dim_input = (size,size) n_x = x['x'].shape[0] type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'gaussian' if os.environ.has_key('type_px'): type_px = os.environ['type_px'] color.printBlue('Generative type: '+type_px) n_train = 50000 n_test = 10000 n_batch = 5000 colorImg = True bernoulli_x = False byteToFloat = False #weight_decay = float(n_batch)/n_train elif dataset == 'cifar10_zca': import anglepy.data.cifar10 as cifar10 size = 32 train_x, train_y, test_x, test_y = cifar10.load_numpy() train_x = train_x.astype(np.float32).T test_x = test_x.astype(np.float32).T ## f_enc, f_dec = pp.Identity() zca_mean, zca_w, zca_winv = cifar10.zca(train_x) train_x = zca_w.dot(train_x-zca_mean) test_x = zca_w.dot(test_x-zca_mean) if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True: color.printBlue('Loading prior') cifar_prior = sio.loadmat('data/cifar10_prior/cifar10_prior.mat') train_mean_prior = cifar_prior['z_train'] test_mean_prior = cifar_prior['z_test'] else: train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} x_valid = x_test L_valid = 1 dim_input = (size,size) n_y = 10 n_x = x['x'].shape[0] type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'gaussian' n_train = 50000 n_test = 10000 n_batch = 5000 colorImg = True bernoulli_x = False byteToFloat = False if os.environ.has_key('type_px'): type_px = os.environ['type_px'] color.printBlue('Generative type: '+type_px) nonlinear = 'softplus' elif dataset == 'mnist_basic': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_' tmp = sio.loadmat(data_dir+'train.mat') #color.printRed(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'rectangle': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_' tmp = sio.loadmat(data_dir+'train.mat') color.printRed(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,1000:] valid_y = train_y[1000:] train_x = train_x[:,:1000] train_y = train_y[:1000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 2 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 1000 n_valid = 200 n_test = 50000 n_batch = 500 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train #print '3', n_x elif dataset == 'convex': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'convex_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,6000:] valid_y = train_y[6000:] train_x = train_x[:,:6000] train_y = train_y[:6000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 2 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 6000 n_valid = 2000 n_test = 50000 n_batch = 120 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'rectangle_image': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_im_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 2 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_rot': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_rotation_normalized_float_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) f_enc, f_dec = pp.Identity() x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_back_rand': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_random_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) f_enc, f_dec = pp.Identity() x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_back_image': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_images_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) f_enc, f_dec = pp.Identity() x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_back_image_rot': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_background_images_rotation_normalized_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) f_enc, f_dec = pp.Identity() x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_binarized': #import anglepy.data.mnist_binarized as mnist_binarized # MNIST import anglepy.data.mnist as mnist size = 28 data_dir = '/home/lichongxuan/regbayes2/data/mat_data/'+'binarized_mnist_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T #train_y = tmp['t_train'].T.astype(np.int32) tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T tmp = sio.loadmat(data_dir+'valid.mat') #print tmp.keys() valid_x = tmp['x_valid'].T #test_y = tmp['t_test'].T.astype(np.int32) f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) train_x = np.hstack((train_x, valid_x)).astype(np.float32) train_mean_prior = np.hstack((train_mean_prior,valid_mean_prior)).astype(np.float32) print train_mean_prior.shape print train_x.shape x = {'x': train_x.astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': test_x.astype(np.float32),'mean_prior':test_mean_prior.astype(np.float32)} x_test = x_valid L_valid = 1 dim_input = (28,28) n_x = 28*28 n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 60000 n_valid = 10000 n_batch = 1000 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_binarized_own': #import anglepy.data.mnist_binarized as mnist_binarized # MNIST import anglepy.data.mnist as mnist size = 28 data_dir = 'data/mnist_binarized_own/'+'binarized_mnist_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['train_x'].T #train_y = tmp['t_train'].T.astype(np.int32) tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['test_x'].T tmp = sio.loadmat(data_dir+'valid.mat') #print tmp.keys() valid_x = tmp['valid_x'].T #test_y = tmp['t_test'].T.astype(np.int32) f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) train_x = np.hstack((train_x, valid_x)).astype(np.float32) train_mean_prior = np.hstack((train_mean_prior,valid_mean_prior)).astype(np.float32) print train_mean_prior.shape print train_x.shape x = {'x': train_x.astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32)} x_train = x x_valid = {'x': test_x.astype(np.float32),'mean_prior':test_mean_prior.astype(np.float32)} x_test = x_valid L_valid = 1 dim_input = (28,28) n_x = 28*28 n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 60000 n_valid = 10000 n_batch = 1000 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'freyface': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy() np.random.shuffle(train_x) x = {'x': train_x.T[:,0:n_train]} x_valid = {'x': train_x.T[:,n_train:]} L_valid = 1 dim_input = (28,20) n_x = 20*28 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'bounded01' nonlinear = 'tanh' #tanh works better with freyface #'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'freyface_pca': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) f_enc, f_dec, _ = pp.PCA(train_x, 0.99) train_x = f_enc(train_x) x = {'x': train_x[:,0:n_train].astype(np.float32)} x_valid = {'x': train_x[:,n_train:].astype(np.float32)} L_valid = 1 dim_input = (28,20) n_x = train_x.shape[0] type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'freyface_bernoulli': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) x = {'x': train_x[:,0:n_train].astype(np.float32)} x_valid = {'x': train_x[:,n_train:].astype(np.float32)} L_valid = 1 dim_input = (28,20) n_x = train_x.shape[0] type_pz = 'gaussianmarg' type_px = 'bernoulli' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'norb_48_24300_pca': size = 48 train_x, train_y, test_x, test_y = np.load('data/norb/norb_48_24300.npy') _x = {'x': train_x, 'y': train_y} #ndict.shuffleCols(_x) #train_x = _x['x'] #train_y = _x['y'] #print _x['x'][:,:10000].shape # Do PCA print 'pca' f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=500, toFloat=False) ndict.savez(pca_params, logdir+'pca_params') print 'done' train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) x = {'x': f_enc(train_x).astype(np.float32), 'mean_prior' : train_mean_prior.astype(np.float32)} x_valid = {'x': f_enc(test_x).astype(np.float32), 'mean_prior' : test_mean_prior.astype(np.float32)} x_test = {'x': f_enc(test_x).astype(np.float32), 'mean_prior' : test_mean_prior.astype(np.float32)} x_train = x print x['x'].shape print x['mean_prior'].shape L_valid = 1 n_y = 5 n_x = x['x'].shape[0] dim_input = (size,size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay= float(n_batch)/train_x.shape[1] elif dataset == 'norb': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size,size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False byteToFloat = False bernoulli_x = False weight_decay= float(n_batch)/train_x.shape[1] elif dataset == 'norb_pca': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) f_enc, f_dec, _ = pp.PCA(train_x, 0.999) #f_enc, f_dec, _ = pp.normalize_random(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size,size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay= float(n_batch)/train_x.shape[1] elif dataset == 'norb_normalized': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) #f_enc, f_dec, _ = pp.PCA(train_x, 0.99) #f_enc, f_dec, _ = pp.normalize_random(train_x) f_enc, f_dec, _ = pp.normalize(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size,size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay= float(n_batch)/train_x.shape[1] elif dataset == 'svhn': # SVHN dataset #import anglepy.data.svhn as svhn size = 32 train_x, train_y, test_x, test_y = np.load('data/svhn/svhn.npy') #extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True) #x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))} #ndict.shuffleCols(x) x = {'x' : train_x, 'y': train_y} print 'Performing PCA, can take a few minutes... ' cutoff = 300 if os.environ.has_key('cutoff'): cutoff = int(os.environ['cutoff']) color.printBlue('cutoff: '+str(cutoff)) f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=cutoff, toFloat=True) ndict.savez(pca_params, logdir+'pca_params') print 'Done.' n_y = 10 if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True: color.printBlue('Loading prior') train_mean_prior, train_y1, test_mean_prior, test_y1 = np.load('data/svhn/svhn_prior.npy') print np.sum((train_y1 == train_y).astype(np.int32)) print np.sum((test_y1 == test_y).astype(np.int32)) else: train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) x = {'x': f_enc(x['x']).astype(np.float32), 'mean_prior':train_mean_prior.astype(np.float32)} x_train = x x_test = {'x': f_enc(test_x).astype(np.float32), 'mean_prior':test_mean_prior.astype(np.float32)} x_valid = x_test print x_train['x'].shape print x_test['x'].shape print train_y.shape print test_y.shape print x_train['mean_prior'].shape print x_test['mean_prior'].shape L_valid = 1 n_x = x['x'].shape[0] dim_input = (size,size) n_batch = 5000 n_train = 604388 n_valid = 26032 n_test = 26032 colorImg = True bernoulli_x = False byteToFloat = False type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' else: print 'invalid data set' exit() #print '2', n_x # Construct model from anglepy.models import GPUVAE_Z_X learning_rate1 = 3e-4 if os.environ.has_key('stepsize'): learning_rate1 = float(os.environ['stepsize']) color.printBlue(str(learning_rate1)) if os.environ.has_key('preoption'): pre = int(os.environ['preoption']) if pre == 1: updates = get_adam_optimizer(learning_rate=3e-4, decay1=0.9, decay2=0.999, weight_decay=0) elif pre ==2: updates = get_adam_optimizer(learning_rate=3e-4, decay1=0.9, decay2=0.999, weight_decay=weight_decay) else: raise Exception('Prepotion unknown') with open(logdir+'hook.txt', 'a') as f: print >>f, 'preoption ' + str(pre) else: updates = get_adam_optimizer(learning_rate=learning_rate1, weight_decay=weight_decay) #print '1', n_x model = GPUVAE_Z_X(updates, n_x, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3) if os.environ.has_key('pretrain') and bool(int(os.environ['pretrain'])) == True: #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/' #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/' #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/' if len(n_hidden) == 1: color.printBlue('pre-training-1-layer') layer_str = '-500' elif len(n_hidden) == 2: color.printBlue('pre-training-2-layers') layer_str = '-(500, 500)' else: raise Exception() pre_str = 'models/gpulearn_z_x_' if dataset == 'mnist': #dir = pre_str + 'mnist_'+str(n_z)+layer_str+'_longrun/' dir = 'models/mnist_z_x_50-500-500_longrun/' elif dataset == 'mnist_rot': dir = pre_str + 'mnist_rot_'+str(n_z)+layer_str+'_longrun/' elif dataset == 'mnist_back_rand': dir = pre_str + 'mnist_back_rand_'+str(n_z)+layer_str+'_longrun/' elif dataset == 'mnist_back_image': dir = pre_str + 'mnist_back_image_'+str(n_z)+layer_str+'_longrun/' elif dataset == 'mnist_back_image_rot': dir = pre_str + 'mnist_back_image_rot_'+str(n_z)+layer_str+'_longrun/' elif dataset == 'rectangle': dir = pre_str + 'rectangle_'+str(n_z)+layer_str+'_longrun/' elif dataset == 'rectangle_image': dir = pre_str + 'rectangle_image_'+str(n_z)+layer_str+'_longrun/' elif dataset == 'convex': dir = pre_str + 'convex_'+str(n_z)+layer_str+'_longrun/' elif dataset == 'mnist_basic': dir = pre_str + 'mnist_basic_'+str(n_z)+layer_str+'_longrun/' if dataset == 'svhn': if (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True): print 'prior-------------------' pre_dir = 'results/gpulearn_z_x_svhn_'+str(n_z)+'-500-500_prior_'+str(cutoff)+'_longrun/' else: pre_dir = 'results/gpulearn_z_x_svhn_'+str(n_z)+'-500-500_'+str(cutoff)+'_longrun/' color.printBlue(pre_dir) w = ndict.loadz(pre_dir+'w_best.ndict.tar.gz') v = ndict.loadz(pre_dir+'v_best.ndict.tar.gz') elif n_z == 50: print 'n_z = 50', dir w = ndict.loadz(dir+'w_best.ndict.tar.gz') v = ndict.loadz(dir+'v_best.ndict.tar.gz') else: print 'n_z != 50' w = ndict.loadz(pre_dir+'w_best.ndict.tar.gz') v = ndict.loadz(pre_dir+'v_best.ndict.tar.gz') ndict.set_value2(model.w, w) ndict.set_value2(model.v, v) # Some statistics for optimization ll_valid_stats = [-1e99, 0] # Progress hook def hook(epoch, t, ll): if epoch%10 != 0: return n_batch_n = n_batch if n_batch_n > n_valid: n_batch_n = n_valid ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch_n, byteToFloat=byteToFloat) ll_test = ll_valid #if not dataset == 'mnist_binarized': if not dataset == 'svhn': ll_test, _ = model.est_loglik(x_test, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) # Log ndict.savez(ndict.get_value(model.v), logdir+'v') ndict.savez(ndict.get_value(model.w), logdir+'w') def infer(data, n_batch=1000): #print '--', n_batch size = data['x'].shape[1] res = np.zeros((sum(n_hidden), size)) res1 = np.zeros((n_z,size)) res2 = np.zeros((n_hidden[-1],size)) res3 = np.zeros((n_z,size)) for i in range(0, size, n_batch): idx_to = min(size, i+n_batch) x_batch = ndict.getCols(data, i, idx_to) # may have bugs nn_batch = idx_to - i _x, _z, _z_confab = model.gen_xz(x_batch, {}, nn_batch) x_samples = _z_confab['x'] for (hi, hidden) in enumerate(_z_confab['hidden']): res[sum(n_hidden[:hi]):sum(n_hidden[:hi+1]),i:i+nn_batch] = hidden res1[:,i:i+nn_batch] = _z_confab['mean'] res2[:,i:i+nn_batch] = _z_confab['hidden'][-1] res3[:,i:i+nn_batch] = _z_confab['logvar'] #print '--' return res, res1, res2, res3 #print '..', n_batch #if not dataset == 'mnist_binarized': if not dataset == 'svhn': z_test, z_test1, z_test2, vv_test = infer(x_test) z_train, z_train1, z_train2, vv_train = infer(x_train) if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir+'v_best') ndict.savez(ndict.get_value(model.w), logdir+'w_best') #if not dataset == 'mnist_binarized': if dataset == 'svhn': pass #np.save(logdir+'full_latent', ('z_test': z_test, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train)) #np.save(logdir+'last_latent', ('z_test': z_test2, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train2)) else: sio.savemat(logdir+'full_latent.mat', {'z_test': z_test, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train}) sio.savemat(logdir+'mean_latent.mat', {'z_test': z_test1, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train1}) sio.savemat(logdir+'last_latent.mat', {'z_test': z_test2, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train2}) else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if ll_valid_stats[1] > 1000: print "Finished" with open(logdir+'hook.txt', 'a') as f: print >>f, "Finished" exit() print epoch, t, ll, ll_valid, ll_test, ll_valid_stats with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, t, ll, ll_valid, ll_test, ll_valid_stats ''' if dataset != 'svhn': l_t, px_t, pz_t, qz_t = model.test(x_train, n_samples=1, n_batch=n_batch, byteToFloat=byteToFloat) print 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t #sigma_square = float(os.environ['sigma_square']) print 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior)) with open(logdir+'hook.txt', 'a') as f: print >>f, 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t print >>f, 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior)) ''' # Graphics if gfx and epoch%gfx_freq == 0: #tail = '.png' tail = '-'+str(epoch)+'.png' v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset and 'zca' not in dataset: if 'w0' in v: image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir+'q_w0'+tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir+'out_w'+tail, 'PNG') if 'out_unif' in w: image = paramgraphics.mat_to_img(f_dec(w['out_unif'].reshape((-1,1))), dim_input, True, colorImg=colorImg) image.save(logdir+'out_unif'+tail, 'PNG') if n_z == 2: n_width = 10 import scipy.stats z = {'z':np.zeros((2,n_width**2))} for i in range(0,n_width): for j in range(0,n_width): z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width) z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width) x, _, _z = model.gen_xz({}, z, n_width**2) if dataset == 'mnist': x = 1 - _z['x'] image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input) image.save(logdir+'2dmanifold'+tail, 'PNG') else: if 'norb' in dataset or dataset=='svhn': nn_batch_nn = 64 else: nn_batch_nn = 144 if not(os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True) and (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True): mp_in = np.random.randint(0,x_train['mean_prior'].shape[1],nn_batch_nn) m_p = x_train['mean_prior'][:,mp_in] s_s = 1 if os.environ.has_key('sigma_square'): s_s = float(os.environ['sigma_square']) x_samples = model.gen_xz_prior({}, {}, m_p, s_s, n_batch=nn_batch_nn) x_samples = x_samples['x'] m_p1 = (np.ones((n_z, nn_batch_nn)).T * np.mean(x_train['mean_prior'], axis = 1)).T x_samples1 = model.gen_xz_prior({}, {}, m_p1.astype(np.float32), s_s, n_batch=nn_batch_nn) image = paramgraphics.mat_to_img(f_dec(x_samples1['x']), dim_input, colorImg=colorImg) image.save(logdir+'mean_samples-prior'+tail, 'PNG') x_samples11 = model.gen_xz_prior11({}, {}, m_p, s_s, n_batch=nn_batch_nn) image = paramgraphics.mat_to_img(f_dec(x_samples11['x']), dim_input, colorImg=colorImg) image.save(logdir+'prior-image'+tail, 'PNG') else: _x, _, _z_confab = model.gen_xz({}, {}, n_batch=nn_batch_nn) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg) image.save(logdir+'samples-prior'+tail, 'PNG') #x_samples = _x['x'] #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) #image.save(logdir+'samples2'+tail, 'PNG') else: # Model with preprocessing if 'w0' in v: tmp = f_dec(v['w0'][:].T) #print dim_input #print tmp.shape if 'zca' in dataset or dataset=='svhn': tmp = zca_dec(zca_mean, zca_winv, tmp) image = paramgraphics.mat_to_img(tmp, dim_input, True, colorImg=colorImg) image.save(logdir+'q_w0'+tail, 'PNG') tmp = f_dec(w['out_w'][:]) if 'zca' in dataset: tmp = zca_dec(zca_mean, zca_winv, tmp) image = paramgraphics.mat_to_img(tmp, dim_input, True, colorImg=colorImg) image.save(logdir+'out_w'+tail, 'PNG') if dataset == 'svhn': nn_batch_nn = 64 else: nn_batch_nn = 144 if not(os.environ.has_key('train_residual') and bool(int(os.environ['train_residual'])) == True) and (os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True): mp_in = np.random.randint(0,x_train['mean_prior'].shape[1],nn_batch_nn) m_p = x_train['mean_prior'][:,mp_in] s_s = 1 if os.environ.has_key('sigma_square'): s_s = float(os.environ['sigma_square']) x_samples = model.gen_xz_prior({}, {}, m_p, s_s, n_batch=nn_batch_nn) x_samples = zca_dec(zca_mean, zca_winv,x_samples['x']) x_samples = np.minimum(np.maximum(x_samples, 0), 1) x_samples11 = model.gen_xz_prior11({}, {}, m_p, s_s, n_batch=nn_batch_nn) x_samples11 = zca_dec(zca_mean,zca_winv,x_samples11['x']) x_samples11 = np.minimum(np.maximum(x_samples11, 0), 1) image = paramgraphics.mat_to_img(x_samples11, dim_input, colorImg=colorImg) image.save(logdir+'prior-image'+tail, 'PNG') else: _x, _z, _z_confab = model.gen_xz({}, {}, n_batch=nn_batch_nn) x_samples = f_dec(_z_confab['x']) x_samples = np.minimum(np.maximum(x_samples, 0), 1) image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') ''' def infer(data, n_batch=1000): #print '--', n_batch size = data['x'].shape[1] res = np.zeros((sum(n_hidden), size)) res1 = np.zeros((n_z,size)) res2 = np.zeros((n_hidden[-1],size)) res3 = np.zeros((n_z,size)) for i in range(0, size, n_batch): idx_to = min(size, i+n_batch) x_batch = ndict.getCols(data, i, idx_to) # may have bugs nn_batch = idx_to - i _x, _z, _z_confab = model.gen_xz(x_batch, {}, nn_batch) x_samples = _z_confab['x'] for (hi, hidden) in enumerate(_z_confab['hidden']): res[sum(n_hidden[:hi]):sum(n_hidden[:hi+1]),i:i+nn_batch] = hidden res1[:,i:i+nn_batch] = _z_confab['mean'] res2[:,i:i+nn_batch] = _z_confab['hidden'][-1] res3[:,i:i+nn_batch] = _z_confab['logvar'] # return res, res1, res2, res3 #print n_batch #if not dataset == 'mnist_binarized': z_test, z_test1, z_test2, vv_test = infer(x_test) z_train, z_train1, z_train2, vv_train = infer(x_train) l_t, px_t, pz_t, qz_t = model.test(x_train, n_samples=1, n_batch=n_batch, byteToFloat=byteToFloat) print 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t #sigma_square = float(os.environ['sigma_square']) print 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior)) with open(logdir+'hook.txt', 'a') as f: print >>f, 'Elogpx', px_t, 'Elogpz', pz_t, '-Elogqz', qz_t print >>f, 'var', np.mean(np.exp(vv_train)), 'q', np.mean(np.abs(z_train1)), 'p', np.mean(np.abs(train_mean_prior)), 'd', np.mean(np.abs(z_train1-train_mean_prior)) #if not dataset == 'mnist_binarized': sio.savemat(logdir+'full_latent.mat', {'z_test': z_test, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train}) sio.savemat(logdir+'mean_latent.mat', {'z_test': z_test1, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train1}) sio.savemat(logdir+'last_latent.mat', {'z_test': z_test2, 'train_y':train_y, 'test_y':test_y, 'z_train': z_train2}) ''' # Optimize #SFO dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat) loop_va(dostep, hook) pass
def main(n_passes, n_hidden, seed, alpha, n_minibatches, n_unlabeled, n_classes): """ Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z) And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised). We're going to use q(y|x) as a classification model. """ # Create the directory for the log and outputs. logdir = 'results/learn_yz_x_hyp' + '-' + str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print("---------------") print('Logdir:', logdir) # Feed with the seed: np.random.seed(seed) # Load model for feature extraction path = 'results/hyper_50-(500, 500)_longrun/' # Load the parameters of the model that has been trained previously: l1_v = ndict.loadz(path + 'v_best.ndict.tar.gz') l1_w = ndict.loadz(path + 'w_best.ndict.tar.gz') # Same hyperparameters that we use for training M1: # Number of hidden nodes in the model: n_h = (500, 500) # Size of our feature vector: n_x = 67 * 4 # Number of latent variables: n_z = 50 nonlinear = 'softplus' type_px = 'bernoulli' type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' # Create the M1: from anglepy.models.VAE_Z_X import VAE_Z_X l1_model = VAE_Z_X(n_x=n_x, n_hidden_q=n_h, n_z=n_z, n_hidden_p=n_h, nonlinear_q=nonlinear, nonlinear_p=nonlinear, type_px=type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=1) # Load dataset: from hyperspectralData import HyperspectralData x_l, y_l, x_u, y_u, valid_x, valid_y, test_x, test_y = HyperspectralData( ).load_dataset_m2(n_unlabeled=n_unlabeled, n_classes=n_classes) n_labeled = x_l.shape[1] if n_labeled % n_minibatches != 0: # We need to delete some samples indexes_to_delete = np.random.choice(range(n_labeled), size=(n_labeled % n_minibatches), replace=False) x_l = np.delete(x_l, indexes_to_delete, axis=1) y_l = np.delete(y_l, indexes_to_delete, axis=1) # Extract features def transform(v, _x): # Get the mean and the variance of the distribution learned to generate the z of the dataset. return l1_model.dist_qz['z'](*([_x] + list(v.values()) + [np.ones((1, _x.shape[1]))])) # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u} x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) # Copied from learn_yz_x_ss: n_x = l1_w[b'w0'].shape[1] n_y = n_classes type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' # Init VAE model p(x,y,z) from anglepy.models.VAE_YZ_X import VAE_YZ_X uniform_y = True model = VAE_YZ_X(n_x, n_y, n_hidden, n_z, n_hidden, nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=1, uniform_y=uniform_y) v, w = model.init_w(1e-3) # Init q(y|x) model from anglepy.models.MLP_Categorical import MLP_Categorical n_units = [n_x] + list(n_hidden) + [n_y] model_qy = MLP_Categorical(n_units=n_units, prior_sd=1, nonlinearity=nonlinear) u = model_qy.init_w(1e-3) write_headers(logdir) # Progress hook t0 = time.time() def hook(step, u, v, w, ll): print("---------------") print("Current results:") print("Step:", step) print(" ") # Get classification error of validation and test sets def error(dataset_x, dataset_y): _, _, _z = model_qy.gen_xz(u, {'x': dataset_x}, {}) n_examples = 20 max_row = dataset_y.shape[1] example_rows = np.random.choice(max_row, size=n_examples, replace=False) print(" Predictions:", np.argmax(_z['py'], axis=0)[example_rows]) print(" Real: ", np.argmax(dataset_y, axis=0)[example_rows]) return np.sum( np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0) ) / (0.0 + dataset_y.shape[1]) print("Validset:") valid_error = error(valid_x, valid_y) print("Testset:") test_error = error(test_x, test_y) # Save variables ndict.savez(u, logdir + 'u') ndict.savez(v, logdir + 'v') ndict.savez(w, logdir + 'w') time_elapsed = time.time() - t0 # This will be showing the current results and write them in a file: with open(logdir + 'AA_results.txt', 'a') as file: file.write( str(step) + ',' + str(time_elapsed) + ',' + str(valid_error) + ',' + str(test_error) + '\n') print("Time elapsed:", time_elapsed) print("Validset error:", valid_error) print("Testset error:", test_error) print("LogLikelihood:", ll) return valid_error # Optimize result = optim_vae_ss_adam(alpha, model_qy, model, x_labeled, x_unlabeled, n_y, u, v, w, n_minibatches=n_minibatches, n_passes=n_passes, hook=hook) return result
def main(n_z, n_hidden, dataset, seed, comment, gfx=True): # Initialize logdir import time logdir = 'results/gpulearn_z_x_' + dataset + '_' + str(n_z) + '-' + str( n_hidden) + '_' + comment + '_' + str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir print 'gpulearn_z_x', n_z, n_hidden, dataset, seed with open(logdir + 'hook.txt', 'a') as f: print >> f, 'learn_z_x', n_z, n_hidden, dataset, seed np.random.seed(seed) gfx_freq = 1 weight_decay = 0 f_enc, f_dec = lambda x: x, lambda x: x # Init data if dataset == 'mnist': import anglepy.data.mnist as mnist # MNIST size = 28 train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy( size) x = {'x': train_x.astype(np.float32)} x_valid = {'x': valid_x.astype(np.float32)} x_test = {'x': test_x.astype(np.float32)} L_valid = 1 dim_input = (size, size) n_x = size * size type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 50000 n_batch = 1000 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch) / n_train if dataset == 'mnist_binarized': import anglepy.data.mnist_binarized as mnist_binarized # MNIST train_x, valid_x, test_x = mnist_binarized.load_numpy(28) x = {'x': np.hstack((train_x, valid_x)).astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 dim_input = (28, 28) n_x = 28 * 28 n_y = 10 type_qz = 'gaussianmarg' type_pz = 'mog' nonlinear = 'rectlin' type_px = 'bernoulli' n_train = 60000 n_batch = 1000 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / n_train elif dataset == 'freyface': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy() np.random.shuffle(train_x) x = {'x': train_x.T[:, 0:n_train]} x_valid = {'x': train_x.T[:, n_train:]} L_valid = 1 dim_input = (28, 20) n_x = 20 * 28 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'bounded01' nonlinear = 'tanh' #tanh works better with freyface #'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / n_train elif dataset == 'freyface_pca': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) f_enc, f_dec, _ = pp.PCA(train_x, 0.99) train_x = f_enc(train_x) x = {'x': train_x[:, 0:n_train].astype(np.float32)} x_valid = {'x': train_x[:, n_train:].astype(np.float32)} L_valid = 1 dim_input = (28, 20) n_x = train_x.shape[0] type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'freyface_bernoulli': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) x = {'x': train_x[:, 0:n_train].astype(np.float32)} x_valid = {'x': train_x[:, n_train:].astype(np.float32)} L_valid = 1 dim_input = (28, 20) n_x = train_x.shape[0] type_pz = 'gaussianmarg' type_px = 'bernoulli' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'norb': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size, size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False byteToFloat = False bernoulli_x = False weight_decay = float(n_batch) / train_x.shape[1] elif dataset == 'norb_pca': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) f_enc, f_dec, _ = pp.PCA(train_x, 0.999) #f_enc, f_dec, _ = pp.normalize_random(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size, size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / train_x.shape[1] elif dataset == 'norb_normalized': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) #f_enc, f_dec, _ = pp.PCA(train_x, 0.99) #f_enc, f_dec, _ = pp.normalize_random(train_x) f_enc, f_dec, _ = pp.normalize(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size, size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / train_x.shape[1] elif dataset == 'svhn': # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, test_x, test_y = svhn.load_numpy( False, binarize_y=True) #norb.load_resized(size, binarize_y=True) extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True) x = { 'x': np.hstack((train_x, extra_x)), 'y': np.hstack((train_y, extra_y)) } ndict.shuffleCols(x) print 'Performing PCA, can take a few minutes... ', f_enc, f_dec, pca_params = pp.PCA(x['x'][:, :10000], cutoff=600, toFloat=True) ndict.savez(pca_params, logdir + 'pca_params') print 'Done.' n_y = 10 x = {'x': f_enc(x['x']).astype(np.float32)} x_valid = {'x': f_enc(test_x).astype(np.float32)} L_valid = 1 n_x = x['x'].shape[0] dim_input = (size, size) n_batch = 5000 colorImg = True bernoulli_x = False byteToFloat = False type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' # Construct model from anglepy.models import GPUVAE_Z_X updates = get_adam_optimizer(learning_rate=3e-4, weight_decay=weight_decay) model = GPUVAE_Z_X(updates, n_x, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3) if False: #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/' #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/' #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/' dir = '/home/ubuntu/results/gpulearn_z_x_mnist_50-[500, 500]__1414259423/' w = ndict.loadz(dir + 'w_best.ndict.tar.gz') v = ndict.loadz(dir + 'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # Some statistics for optimization ll_valid_stats = [-1e99, 0] # Progress hook def hook(epoch, t, ll): if epoch % 10 != 0: return ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) # Log ndict.savez(ndict.get_value(model.v), logdir + 'v') ndict.savez(ndict.get_value(model.w), logdir + 'w') if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir + 'v_best') ndict.savez(ndict.get_value(model.w), logdir + 'w_best') else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if ll_valid_stats[1] > 1000: print "Finished" with open(logdir + 'hook.txt', 'a') as f: print >> f, "Finished" exit() print epoch, t, ll, ll_valid, ll_valid_stats with open(logdir + 'hook.txt', 'a') as f: print >> f, epoch, t, ll, ll_valid, ll_valid_stats # Graphics if gfx and epoch % gfx_freq == 0: #tail = '.png' tail = '-' + str(epoch) + '.png' v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset: if 'w0' in v: image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir + 'q_w0' + tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir + 'out_w' + tail, 'PNG') if 'out_unif' in w: image = paramgraphics.mat_to_img(f_dec( w['out_unif'].reshape((-1, 1))), dim_input, True, colorImg=colorImg) image.save(logdir + 'out_unif' + tail, 'PNG') if n_z == 2: n_width = 10 import scipy.stats z = {'z': np.zeros((2, n_width**2))} for i in range(0, n_width): for j in range(0, n_width): z['z'][0, n_width * i + j] = scipy.stats.norm.ppf( float(i) / n_width + 0.5 / n_width) z['z'][1, n_width * i + j] = scipy.stats.norm.ppf( float(j) / n_width + 0.5 / n_width) x, _, _z = model.gen_xz({}, z, n_width**2) if dataset == 'mnist': x = 1 - _z['x'] image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input) image.save(logdir + '2dmanifold' + tail, 'PNG') else: _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg) image.save(logdir + 'samples' + tail, 'PNG') #x_samples = _x['x'] #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) #image.save(logdir+'samples2'+tail, 'PNG') else: # Model with preprocessing if 'w0' in v: image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir + 'q_w0' + tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir + 'out_w' + tail, 'PNG') _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144) x_samples = f_dec(_z_confab['x']) x_samples = np.minimum(np.maximum(x_samples, 0), 1) image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) image.save(logdir + 'samples' + tail, 'PNG') # Optimize #SFO dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat) loop_va(dostep, hook) pass
def main(n_z, n_hidden, dataset, seed, gfx=True, _size=None): '''Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z). x and y are (always) observed. I.e. this cannot be used for semi-supervised learning ''' assert (type(n_hidden) == tuple or type(n_hidden) == list) assert type(n_z) == int assert isinstance(dataset, basestring) print 'gpulearn_yz_x', n_z, n_hidden, dataset, seed comment = '' if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True: comment += 'prior-' if os.environ.has_key('default') and bool(int(os.environ['default'])) == True: comment += 'default-' else: comment += 'not_default-' import time logdir = 'results/gpulearn_yz_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+comment+'-'+str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir np.random.seed(seed) # Init data if dataset == 'mnist': ''' What works well: 100-2-100 (Generated digits stay bit shady) 1000-2-1000 (Needs pretty long training) ''' import anglepy.data.mnist as mnist # MNIST size = 28 train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size, binarize_y=True) f_enc, f_dec = lambda x:x, lambda x:x if os.environ.has_key('prior') and bool(int(os.environ['prior'])) == True: color.printBlue('Loading prior') mnist_prior = sio.loadmat('data/mnist_prior/mnist_prior.mat') train_mean_prior = mnist_prior['z_train'] valid_mean_prior = mnist_prior['z_valid'] else: train_mean_prior = np.zeros((n_z,train_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) x = {'x': train_x[:,:].astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': train_y[:,:].astype(np.float32)} x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32),'y': valid_y.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 n_batch = 1000 colorImg = False bernoulli_x = True byteToFloat = False mosaic_w = 5 mosaic_h = 2 type_px = 'bernoulli' #print 'Network Structure:', n_z, elif dataset == 'mnist_basic': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_' tmp = sio.loadmat(data_dir+'train.mat') #color.printRed(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'rectangle': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_' tmp = sio.loadmat(data_dir+'train.mat') color.printRed(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,1000:] valid_y = train_y[1000:] train_x = train_x[:,:1000] train_y = train_y[:1000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 2 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 1000 n_valid = 200 n_test = 50000 n_batch = 500 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'convex': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'convex_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,6000:] valid_y = train_y[6000:] train_x = train_x[:,:6000] train_y = train_y[:6000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 2 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 6000 n_valid = 2000 n_test = 50000 n_batch = 120 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'rectangle_image': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'rectangles_im_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 2 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_rot': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_rotation_normalized_float_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_back_rand': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_random_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_back_image': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_background_images_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'mnist_back_image_rot': # MNIST size = 28 data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_all_background_images_rotation_normalized_' tmp = sio.loadmat(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat') test_x = tmp['x_test'].T test_y = tmp['t_test'].T.astype(np.int32) print train_x.shape print train_y.shape print test_x.shape print test_y.shape f_enc, f_dec = pp.Identity() train_mean_prior = np.zeros((n_z,train_x.shape[1])) test_mean_prior = np.zeros((n_z,test_x.shape[1])) valid_mean_prior = np.zeros((n_z,valid_x.shape[1])) ''' x = {'x': train_x.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} ''' x = {'x': train_x.astype(np.float32), 'mean_prior': train_mean_prior.astype(np.float32), 'y': labelToMat(train_y).astype(np.float32)} x_train = x x_valid = {'x': valid_x.astype(np.float32), 'mean_prior': valid_mean_prior.astype(np.float32), 'y': labelToMat(valid_y).astype(np.float32)} x_test = {'x': test_x.astype(np.float32), 'mean_prior': test_mean_prior.astype(np.float32), 'y': labelToMat(test_y).astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 10000 n_valid = 2000 n_test = 50000 n_batch = 200 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train else: raise Exception('Unknown dataset.') # Init model n_hidden_q = n_hidden n_hidden_p = n_hidden from anglepy.models import GPUVAE_YZ_X if os.environ.has_key('default') and bool(int(os.environ['default'])) == True: updates = get_adam_optimizer(alpha=3e-4, beta1=0.9, beta2=0.999, weight_decay=0) else: updates = get_adam_optimizer(alpha=3e-4, beta1=0.1, beta2=0.001, weight_decay=1000.0/50000.0) model = GPUVAE_YZ_X(updates, n_x, n_y, n_hidden_q, n_z, n_hidden_p[::-1], 'softplus', 'softplus', type_px=type_px, type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1, uniform_y=True) if False: dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414094291/' dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414163488/' w = ndict.loadz(dir+'w_best.ndict.tar.gz') v = ndict.loadz(dir+'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # Some statistics for optimization ll_valid_stats = [-1e99, 0] # Fixed sample for visualisation z_sample = {'z': np.repeat(np.random.standard_normal(size=(n_z, 12)), 12, axis=1).astype(np.float32)} y_sample = {'y': np.tile(np.random.multinomial(1, [1./n_y]*n_y, size=12).T, (1, 12))} # Progress hook def hook(epoch, t, ll): if epoch%10 != 0: return ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) if math.isnan(ll_valid): print "NaN detected. Reverting to saved best parameters" ndict.set_value(model.v, ndict.loadz(logdir+'v.ndict.tar.gz')) ndict.set_value(model.w, ndict.loadz(logdir+'w.ndict.tar.gz')) return if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir+'v_best') ndict.savez(ndict.get_value(model.w), logdir+'w_best') else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if False and ll_valid_stats[1] > 1000: print "Finished" with open(logdir+'hook.txt', 'a') as f: print >>f, "Finished" exit() # Log ndict.savez(ndict.get_value(model.v), logdir+'v') ndict.savez(ndict.get_value(model.w), logdir+'w') print epoch, t, ll, ll_valid with open(logdir+'hook.txt', 'a') as f: print >>f, t, ll, ll_valid if gfx: # Graphics v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} tail = '-'+str(epoch)+'.png' image = paramgraphics.mat_to_img(f_dec(v['w0x'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir+'q_w0x'+tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir+'out_w'+tail, 'PNG') _x = {'y': np.random.multinomial(1, [1./n_y]*n_y, size=144).T} _, _, _z_confab = model.gen_xz(_x, {}, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') _, _, _z_confab = model.gen_xz(y_sample, z_sample, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir+'samples_fixed'+tail, 'PNG') if n_z == 2: import ImageFont import ImageDraw n_width = 10 submosaic_offset = 15 submosaic_width = (dim_input[1]*n_width) submosaic_height = (dim_input[0]*n_width) mosaic = Image.new("RGB", (submosaic_width*mosaic_w, submosaic_offset+submosaic_height*mosaic_h)) for digit in range(0,n_y): if digit >= mosaic_h*mosaic_w: continue _x = {} n_batch_plot = n_width*n_width _x['y'] = np.zeros((n_y,n_batch_plot)) _x['y'][digit,:] = 1 _z = {'z':np.zeros((2,n_width**2))} for i in range(0,n_width): for j in range(0,n_width): _z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width) _z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width) _x, _, _z_confab = model.gen_xz(_x, _z, n_batch=n_batch_plot) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg, tile_spacing=(0,0)) #image.save(logdir+'samples_digit_'+str(digit)+'_'+tail, 'PNG') mosaic_x = (digit%mosaic_w)*submosaic_width mosaic_y = submosaic_offset+int(digit/mosaic_w)*submosaic_height mosaic.paste(image, (mosaic_x, mosaic_y)) draw = ImageDraw.Draw(mosaic) draw.text((1,1),"Epoch #"+str(epoch)+" Loss="+str(int(ll))) #plt.savefig(logdir+'mosaic'+tail, format='PNG') mosaic.save(logdir+'mosaic'+tail, 'PNG') #x_samples = _x['x'] #image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg) #image.save(logdir+'samples2'+tail, 'PNG') # Optimize dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat) loop_va(dostep, hook) pass
# Compute prior probabilities per class train_y = mnist.binarize_labels(train_y) prior_y = train_y.mean(axis=1).reshape((10,1)) # Create model n_x = 28*28 n_y = 10 n_z = 50 n_hidden = 500,500 updates = None model = GPUVAE_YZ_X(updates, n_x, n_y, n_hidden, n_z, n_hidden, 'softplus', 'softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1, uniform_y=True) # Load parameters dir = 'models/mnist_yz_x_50-500-500/' ndict.set_value(model.v, ndict.loadz(dir+'v_best.ndict.tar.gz')) ndict.set_value(model.w, ndict.loadz(dir+'w_best.ndict.tar.gz')) else: raise Exception("Unknown dataset") # Make predictions on test set def get_lowerbound(): lb = np.zeros((n_y,test_x.shape[1])) for _class in range(n_y): y = np.zeros((n_y,test_x.shape[1])) y[_class,:] = 1 _lb = model.eval({'x': test_x.astype(np.float32), 'y':y.astype(np.float32)}, {}) lb[_class,:] = _lb return lb
colorImg = True binarize = False if True: if False: n_hidden = (500,500) n_z = 300 dir = 'models/svhn_yz_x_300-500-500/' else: n_hidden = (1000,1000) n_z = 300 dir = 'models/svhn_yz_x_300-1000-1000/' from anglepy.models import GPUVAE_YZ_X model = GPUVAE_YZ_X(None, n_x, n_y, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-2) w = ndict.loadz(dir+'w_best.ndict.tar.gz') v = ndict.loadz(dir+'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # PCA pca = ndict.loadz(dir+'pca_params.ndict.tar.gz') def f_dec(x): result = pca['eigvec'].dot(x * np.sqrt(pca['eigval'])) * pca['x_sd'] + pca['x_center'] result = np.maximum(0, np.minimum(1, result)) return result if dataset == 'mnist': n_x = 28*28 dim_input = (28,28) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg'
# Compute prior probabilities per class train_y = mnist.binarize_labels(train_y) prior_y = train_y.mean(axis=1).reshape((10,1)) # Create model n_x = 28*28 n_y = 10 n_hidden = 500,500 updates = None print 'n_z:', n_z model = GPUVAE_YZ_X(updates, n_x, n_y, n_hidden, n_z, n_hidden, 'softplus', 'softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1, uniform_y=True) # Load parameters ndict.set_value(model.v, ndict.loadz(dir+'v_best.ndict.tar.gz')) ndict.set_value(model.w, ndict.loadz(dir+'w_best.ndict.tar.gz')) elif dataset == 'mnist_basic': data_dir = os.environ['ML_DATA_PATH']+'/mnist_variations/'+'mnist_' tmp = sio.loadmat(data_dir+'train.mat') #color.printRed(data_dir+'train.mat') train_x = tmp['x_train'].T train_y = tmp['t_train'].T.astype(np.int32) # validation 2000 valid_x = train_x[:,10000:] valid_y = train_y[10000:] train_x = train_x[:,:10000] train_y = train_y[:10000] tmp = sio.loadmat(data_dir+'test.mat')
import numpy as np from anglepy import ndict import scipy.io as sio import cPickle, gzip import math import os, sys # load data, recognition model and generative model print 'Loading data...' dir = sys.argv[1] p_type = sys.argv[5] if p_type == 'null': p_type = '' v = ndict.loadz(dir+'v'+p_type+'.ndict.tar.gz') w = ndict.loadz(dir+'w'+p_type+'.ndict.tar.gz') # perturb data print 'Loading perturbed data...' width = 28 height = 28 denoise_tpye = 1 # sample or mean pertub_type = int(sys.argv[2]) pertub_prob = float(sys.argv[3]) denoise_times = int(sys.argv[4]) # denoising epoch print pertub_type, pertub_prob, denoise_times if pertub_type == 4:
type_px = 'gaussian' nonlinear = 'softplus' n_y = 10 n_batch_w = 10 colorImg = True binarize = False if True: n_hidden = (500,500) n_z = 300 dir = 'models/svhn_yz_x_300-500-500/' from anglepy.models import GPUVAE_YZ_X model = GPUVAE_YZ_X(None, n_x, n_y, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-2) w = ndict.loadz(dir+'w_best.ndict.tar.gz') v = ndict.loadz(dir+'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # PCA f_enc, f_dec = pp.PCA_fromfile(dir+'pca_params.ndict.tar.gz') if dataset == 'mnist': # MNIST import anglepy.data.mnist as mnist train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size=28, binarize_y=True) f_enc, f_dec = lambda x:x, lambda x:x n_x = 28*28 dim_input = (28,28)
def main(n_z, n_hidden, dataset, seed, comment, gfx=True): # Initialize logdir #--------------------- # Setasouto: # Create the directory to save the outputs files and log. #--------------------- import time logdir = 'results/gpulearn_z_x_' + dataset + '_' + str(n_z) + '-' + str( n_hidden) + '_' + comment + '_' + str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print('logdir:', logdir) print('gpulearn_z_x', n_z, n_hidden, dataset, seed) with open(logdir + 'hook.txt', 'a') as f: print(f, 'learn_z_x', n_z, n_hidden, dataset, seed) np.random.seed(seed) gfx_freq = 1 weight_decay = 0 f_enc, f_dec = lambda x: x, lambda x: x # Init data if dataset == 'mnist': import anglepy.data.mnist as mnist # MNIST size = 28 train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy( size) x = {'x': train_x.astype(np.float32)} x_valid = {'x': valid_x.astype(np.float32)} x_test = {'x': test_x.astype(np.float32)} L_valid = 1 dim_input = (size, size) n_x = size * size type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 50000 n_batch = 1000 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch) / n_train if dataset == 'mnist_binarized': import anglepy.data.mnist_binarized as mnist_binarized # MNIST train_x, valid_x, test_x = mnist_binarized.load_numpy(28) x = {'x': np.hstack((train_x, valid_x)).astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 dim_input = (28, 28) n_x = 28 * 28 n_y = 10 type_qz = 'gaussianmarg' type_pz = 'mog' nonlinear = 'rectlin' type_px = 'bernoulli' n_train = 60000 n_batch = 1000 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / n_train elif dataset == 'freyface': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy() np.random.shuffle(train_x) x = {'x': train_x.T[:, 0:n_train]} x_valid = {'x': train_x.T[:, n_train:]} L_valid = 1 dim_input = (28, 20) n_x = 20 * 28 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'bounded01' nonlinear = 'tanh' #tanh works better with freyface #'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / n_train elif dataset == 'freyface_pca': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) f_enc, f_dec, _ = pp.PCA(train_x, 0.99) train_x = f_enc(train_x) x = {'x': train_x[:, 0:n_train].astype(np.float32)} x_valid = {'x': train_x[:, n_train:].astype(np.float32)} L_valid = 1 dim_input = (28, 20) n_x = train_x.shape[0] type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'freyface_bernoulli': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) x = {'x': train_x[:, 0:n_train].astype(np.float32)} x_valid = {'x': train_x[:, n_train:].astype(np.float32)} L_valid = 1 dim_input = (28, 20) n_x = train_x.shape[0] type_pz = 'gaussianmarg' type_px = 'bernoulli' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'norb': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size, size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False byteToFloat = False bernoulli_x = False weight_decay = float(n_batch) / train_x.shape[1] elif dataset == 'norb_pca': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) f_enc, f_dec, _ = pp.PCA(train_x, 0.999) #f_enc, f_dec, _ = pp.normalize_random(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size, size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / train_x.shape[1] elif dataset == 'norb_normalized': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) #f_enc, f_dec, _ = pp.PCA(train_x, 0.99) #f_enc, f_dec, _ = pp.normalize_random(train_x) f_enc, f_dec, _ = pp.normalize(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size, size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / train_x.shape[1] elif dataset == 'svhn': # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, test_x, test_y = svhn.load_numpy( False, binarize_y=True) #norb.load_resized(size, binarize_y=True) extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True) x = { 'x': np.hstack((train_x, extra_x)), 'y': np.hstack((train_y, extra_y)) } ndict.shuffleCols(x) print('Performing PCA, can take a few minutes... ', f_enc, f_dec, pca_params=pp.PCA(x['x'][:, :10000], cutoff=600, toFloat=True)) ndict.savez(pca_params, logdir + 'pca_params') print('Done.') n_y = 10 x = {'x': f_enc(x['x']).astype(np.float32)} x_valid = {'x': f_enc(test_x).astype(np.float32)} L_valid = 1 n_x = x['x'].shape[0] dim_input = (size, size) n_batch = 5000 colorImg = True bernoulli_x = False byteToFloat = False type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' elif dataset == 'hyper': # Hyperspectral images: # Import 1 file of the dataset # TODO: import more files: Edit hyperspectralData.py #I added the hyperspectralData file in the anglepy library from hyperspectralData import HyperspectralData train_x, train_y, valid_x, valid_y, test_x, test_y = HyperspectralData( ).load_numpy(100000) #Dim input: How it has to be written like an image. We said that is: dim_input = (67, 4) n_x = train_x.shape[0] #Dimension of our data vector. x = {'x': train_x.astype(np.float32)} x_valid = {'x': valid_x.astype(np.float32)} x_test = {'x': test_x.astype(np.float32)} L_valid = 1 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = train_x.shape[1] n_batch = 1000 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch) / n_train #Write the hyperparameters used: with open(logdir + 'AA_hyperparameters.txt', 'w') as file: file.write("L_valid: " + str(L_valid) + '\n') file.write("type_qz: " + type_qz + '\n') file.write("type_pz: " + type_pz + '\n') file.write("Nonlinear: " + nonlinear + '\n') file.write("type_px: " + type_px + '\n') file.write("n_train: " + str(n_train) + '\n') file.write("n_batch: " + str(n_batch) + '\n') file.write("colorImg: " + str(colorImg) + '\n') file.write("bernoulli_x: " + str(bernoulli_x) + '\n') file.write("byteToFloat: " + str(byteToFloat) + '\n') file.close() # Write the headers for the csv file output: with open(logdir + 'AA_results.txt', 'w') as file: # Like a csv file: file.write("Step" + ',' + "TimeElapsed" + ',' + "LowerboundMinibatch" + ',' + "LowerboundValid" + ',' + "NumStepNotImproving" + '\n') file.close() # Construct model from anglepy.models import GPUVAE_Z_X updates = get_adam_optimizer(learning_rate=3e-4, weight_decay=weight_decay) model = GPUVAE_Z_X(updates, n_x, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3) #--------------- # SetaSouto: # The [::-1] is to reverse the list. #--------------- if False: #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/' #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/' #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/' dir = '/home/ubuntu/results/gpulearn_z_x_mnist_50-[500, 500]__1414259423/' w = ndict.loadz(dir + 'w_best.ndict.tar.gz') v = ndict.loadz(dir + 'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # Some statistics for optimization ll_valid_stats = [-1e99, 0] # Progress hook def hook(epoch, t, ll): ''' Documented by SetaSouto, may contains errors. :epoch: Number of the current step. :t: Time elapsed from the beginning. :ll: Loglikelihood (?). ''' if epoch % 10 != 0: return ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) # Log ndict.savez(ndict.get_value(model.v), logdir + 'v') ndict.savez(ndict.get_value(model.w), logdir + 'w') if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir + 'v_best') ndict.savez(ndict.get_value(model.w), logdir + 'w_best') else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if ll_valid_stats[1] > 100: print("Finished") with open(logdir + 'hook.txt', 'a') as f: print(f, "Finished") exit() # This will be showing the current results and write them in a file: with open(logdir + 'AA_results.txt', 'a') as file: # Like a csv file: file.write( str(epoch) + ',' + str(t) + ',' + str(ll) + ',' + str(ll_valid) + ',' + str(ll_valid_stats[1]) + '\n') file.close() print("-------------------------") print("Current results:") print(" ") print("Step:", epoch) print("Time elapsed:", t) print("Loglikelihood minibatch:", ll) print("Loglikelihood validSet:", ll_valid) print("N not improving:", ll_valid_stats[1]) #print(epoch, t, ll, ll_valid, ll_valid_stats) #This print the file where are written the stats. #with open(logdir+'hook.txt', 'a') as f: #print(f, epoch, t, ll, ll_valid, ll_valid_stats) # Graphics if gfx and epoch % gfx_freq == 0: #tail = '.png' tail = '-' + str(epoch) + '.png' v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset: if 'w0' in v: image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir + 'q_w0' + tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir + 'out_w' + tail, 'PNG') if 'out_unif' in w: image = paramgraphics.mat_to_img(f_dec( w['out_unif'].reshape((-1, 1))), dim_input, True, colorImg=colorImg) image.save(logdir + 'out_unif' + tail, 'PNG') if n_z == 2: n_width = 10 import scipy.stats z = {'z': np.zeros((2, n_width**2))} for i in range(0, n_width): for j in range(0, n_width): z['z'][0, n_width * i + j] = scipy.stats.norm.ppf( float(i) / n_width + 0.5 / n_width) z['z'][1, n_width * i + j] = scipy.stats.norm.ppf( float(j) / n_width + 0.5 / n_width) x, _, _z = model.gen_xz({}, z, n_width**2) if dataset == 'mnist': x = 1 - _z['x'] image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input) image.save(logdir + '2dmanifold' + tail, 'PNG') else: _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg) image.save(logdir + 'samples' + tail, 'PNG') #x_samples = _x['x'] #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) #image.save(logdir+'samples2'+tail, 'PNG') else: # Model with preprocessing if 'w0' in v: image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir + 'q_w0' + tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir + 'out_w' + tail, 'PNG') _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144) x_samples = f_dec(_z_confab['x']) x_samples = np.minimum(np.maximum(x_samples, 0), 1) image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) image.save(logdir + 'samples' + tail, 'PNG') # Optimize #SFO dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat) loop_va(dostep, hook) pass
def main(n_z, n_hidden, dataset, seed, gfx=True, _size=None): '''Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z). x and y are (always) observed. I.e. this cannot be used for semi-supervised learning ''' assert (type(n_hidden) == tuple or type(n_hidden) == list) assert type(n_z) == int assert isinstance(dataset, str) print('gpulearn_yz_x', n_z, n_hidden, dataset, seed) import time logdir = 'results/gpulearn_yz_x_' + dataset + '_' + str(n_z) + '-' + str( n_hidden) + '-' + str(int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print('logdir:', logdir) np.random.seed(seed) # Init data if dataset == 'mnist': ''' What works well: 100-2-100 (Generated digits stay bit shady) 1000-2-1000 (Needs pretty long training) ''' import anglepy.data.mnist as mnist # MNIST size = 28 train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy( size, binarize_y=True) f_enc, f_dec = lambda x: x, lambda x: x x = { 'x': train_x[:, :].astype(np.float32), 'y': train_y[:, :].astype(np.float32) } x_valid = { 'x': valid_x.astype(np.float32), 'y': valid_y.astype(np.float32) } L_valid = 1 dim_input = (size, size) n_x = size * size n_y = 10 n_batch = 1000 colorImg = False bernoulli_x = True byteToFloat = False mosaic_w = 5 mosaic_h = 2 type_px = 'bernoulli' elif dataset == 'norb': # resized NORB dataset, reshuffled import anglepy.data.norb as norb size = _size #48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) _x = {'x': train_x, 'y': train_y} ndict.shuffleCols(_x) train_x = _x['x'] train_y = _x['y'] # Do PCA f_enc, f_dec, pca_params = pp.PCA(_x['x'][:, :10000], cutoff=2000, toFloat=False) ndict.savez(pca_params, logdir + 'pca_params') x = { 'x': f_enc(train_x).astype(np.float32), 'y': train_y.astype(np.float32) } x_valid = { 'x': f_enc(test_x).astype(np.float32), 'y': test_y.astype(np.float32) } L_valid = 1 n_x = x['x'].shape[0] n_y = 5 dim_input = (size, size) n_batch = 1000 #23400/900 = 27 colorImg = False bernoulli_x = False byteToFloat = False mosaic_w = 5 mosaic_h = 1 type_px = 'gaussian' elif dataset == 'norb_instances': # resized NORB dataset with the instances as classes import anglepy.data.norb2 as norb2 size = _size #48 x, y = norb2.load_numpy_subclasses(size, binarize_y=True) _x = {'x': x, 'y': y} ndict.shuffleCols(_x) # Do pre=processing if True: # Works f_enc, f_dec, pca_params = pp.PCA(_x['x'][:, :10000], cutoff=600, global_sd=True, toFloat=True) ndict.savez(pca_params, logdir + 'pca_params') elif False: # Doesn't work f_enc, f_dec, pp_params = pp.normalize_noise(_x['x'][:, :50000], noise_sd=0.01, global_sd=True, toFloat=True) else: # Doesn't work f_enc, f_dec, params = pp.normalize_random(x=x[:, :10000], global_sd=True, toFloat=True) ndict.savez(params, logdir + 'normalize_random_params') n_valid = 5000 x = { 'x': f_enc(_x['x'][:, :-n_valid]).astype(np.float32), 'y': _x['y'][:, :-n_valid].astype(np.float32) } x_valid = { 'x': f_enc(_x['x'][:, :n_valid]).astype(np.float32), 'y': _x['y'][:, :n_valid].astype(np.float32) } L_valid = 1 n_x = x['x'].shape[0] n_y = 50 dim_input = (size, size) n_batch = 5000 #23400/900 = 27 colorImg = False bernoulli_x = False byteToFloat = False mosaic_w = 5 mosaic_h = 1 type_px = 'gaussian' elif dataset == 'svhn': # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, test_x, test_y = svhn.load_numpy( False, binarize_y=True) #norb.load_resized(size, binarize_y=True) extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True) x = { 'x': np.hstack((train_x, extra_x)), 'y': np.hstack((train_y, extra_y)) } ndict.shuffleCols(x) #f_enc, f_dec, (x_sd, x_mean) = pp.preprocess_normalize01(train_x, True) f_enc, f_dec, pca_params = pp.PCA(x['x'][:, :10000], cutoff=1000, toFloat=True) ndict.savez(pca_params, logdir + 'pca_params') n_y = 10 x = { 'x': f_enc(x['x']).astype(np.float32), 'y': x['y'].astype(np.float32) } x_valid = { 'x': f_enc(test_x).astype(np.float32), 'y': test_y.astype(np.float32) } L_valid = 1 n_x = x['x'].shape[0] dim_input = (size, size) n_batch = 5000 colorImg = True bernoulli_x = False byteToFloat = False mosaic_w = 5 mosaic_h = 2 type_px = 'gaussian' # Init model n_hidden_q = n_hidden n_hidden_p = n_hidden from anglepy.models import GPUVAE_YZ_X updates = get_adam_optimizer(alpha=3e-4, beta1=0.9, beta2=0.999, weight_decay=0) model = GPUVAE_YZ_X(updates, n_x, n_y, n_hidden_q, n_z, n_hidden_p[::-1], 'softplus', 'softplus', type_px=type_px, type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1, uniform_y=True) if False: dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414094291/' dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414163488/' w = ndict.loadz(dir + 'w_best.ndict.tar.gz') v = ndict.loadz(dir + 'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # Some statistics for optimization ll_valid_stats = [-1e99, 0] # Fixed sample for visualisation z_sample = { 'z': np.repeat(np.random.standard_normal(size=(n_z, 12)), 12, axis=1).astype(np.float32) } y_sample = { 'y': np.tile( np.random.multinomial(1, [1. / n_y] * n_y, size=12).T, (1, 12)) } # Progress hook def hook(epoch, t, ll): if epoch % 10 != 0: return ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) if math.isnan(ll_valid): print("NaN detected. Reverting to saved best parameters") ndict.set_value(model.v, ndict.loadz(logdir + 'v.ndict.tar.gz')) ndict.set_value(model.w, ndict.loadz(logdir + 'w.ndict.tar.gz')) return if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir + 'v_best') ndict.savez(ndict.get_value(model.w), logdir + 'w_best') else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if False and ll_valid_stats[1] > 1000: print("Finished") with open(logdir + 'hook.txt', 'a') as f: print("Finished", file=f) exit() # Log ndict.savez(ndict.get_value(model.v), logdir + 'v') ndict.savez(ndict.get_value(model.w), logdir + 'w') print(epoch, t, ll, ll_valid) with open(logdir + 'hook.txt', 'a') as f: print(t, ll, ll_valid, file=f) if gfx: # Graphics v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} tail = '-' + str(epoch) + '.png' image = paramgraphics.mat_to_img(f_dec(v['w0x'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir + 'q_w0x' + tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir + 'out_w' + tail, 'PNG') _x = {'y': np.random.multinomial(1, [1. / n_y] * n_y, size=144).T} _, _, _z_confab = model.gen_xz(_x, {}, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir + 'samples' + tail, 'PNG') _, _, _z_confab = model.gen_xz(y_sample, z_sample, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir + 'samples_fixed' + tail, 'PNG') if n_z == 2: import Image import ImageFont import ImageDraw n_width = 10 submosaic_offset = 15 submosaic_width = (dim_input[1] * n_width) submosaic_height = (dim_input[0] * n_width) mosaic = Image.new( "RGB", (submosaic_width * mosaic_w, submosaic_offset + submosaic_height * mosaic_h)) for digit in range(0, n_y): if digit >= mosaic_h * mosaic_w: continue _x = {} n_batch_plot = n_width * n_width _x['y'] = np.zeros((n_y, n_batch_plot)) _x['y'][digit, :] = 1 _z = {'z': np.zeros((2, n_width**2))} for i in range(0, n_width): for j in range(0, n_width): _z['z'][0, n_width * i + j] = scipy.stats.norm.ppf( float(i) / n_width + 0.5 / n_width) _z['z'][1, n_width * i + j] = scipy.stats.norm.ppf( float(j) / n_width + 0.5 / n_width) _x, _, _z_confab = model.gen_xz(_x, _z, n_batch=n_batch_plot) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg, tile_spacing=(0, 0)) #image.save(logdir+'samples_digit_'+str(digit)+'_'+tail, 'PNG') mosaic_x = (digit % mosaic_w) * submosaic_width mosaic_y = submosaic_offset + int( digit / mosaic_w) * submosaic_height mosaic.paste(image, (mosaic_x, mosaic_y)) draw = ImageDraw.Draw(mosaic) draw.text((1, 1), "Epoch #" + str(epoch) + " Loss=" + str(int(ll))) #plt.savefig(logdir+'mosaic'+tail, format='PNG') mosaic.save(logdir + 'mosaic' + tail, 'PNG') #x_samples = _x['x'] #image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg) #image.save(logdir+'samples2'+tail, 'PNG') # Optimize dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat) loop_va(dostep, hook) pass
import numpy as np import anglepy.ndict as ndict # Path to the result's directory from the M1's training: path = "results/hyper_50-(500, 500)_longrun/" # Loads the parameters that has been training previously: l1_v = ndict.loadz(path + 'v_best.ndict.tar.gz') # Number of hidden nodes in the model: n_h = (500, 500) # Create the M1: from anglepy.models.VAE_Z_X import VAE_Z_X # We have to use the same hyperparameters from the training: l1_model = VAE_Z_X(n_x=67 * 4, n_hidden_q=n_h, n_z=50, n_hidden_p=n_h, nonlinear_q='softplus', nonlinear_p='softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # Now we have to load the dataset that we wanna use. from hyperspectralData import HyperspectralData nsamples = 100 train_x, train_y, valid_x, valid_y, test_x, test_y = HyperspectralData( ).load_numpy(nsamples)
def PCA_fromfile(fname, toFloat=False): pca = ndict.loadz(fname) return PCA_encdec(pca['eigvec'], pca['eigval'], pca['x_center'], pca['x_sd'], toFloat)
def main(n_z, n_hidden, dataset, seed, gfx=True, _size=None): '''Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z). x and y are (always) observed. I.e. this cannot be used for semi-supervised learning ''' assert (type(n_hidden) == tuple or type(n_hidden) == list) assert type(n_z) == int assert isinstance(dataset, basestring) print 'gpulearn_yz_x', n_z, n_hidden, dataset, seed import time logdir = 'results/gpulearn_yz_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'-'+str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir np.random.seed(seed) # Init data if dataset == 'mnist': ''' What works well: 100-2-100 (Generated digits stay bit shady) 1000-2-1000 (Needs pretty long training) ''' import anglepy.data.mnist as mnist # MNIST size = 28 train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size, binarize_y=True) f_enc, f_dec = lambda x:x, lambda x:x x = {'x': train_x[:,:].astype(np.float32), 'y': train_y[:,:].astype(np.float32)} x_valid = {'x': valid_x.astype(np.float32), 'y': valid_y.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size n_y = 10 n_batch = 1000 colorImg = False bernoulli_x = True byteToFloat = False mosaic_w = 5 mosaic_h = 2 type_px = 'bernoulli' elif dataset == 'norb': # resized NORB dataset, reshuffled import anglepy.data.norb as norb size = _size #48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) _x = {'x': train_x, 'y': train_y} ndict.shuffleCols(_x) train_x = _x['x'] train_y = _x['y'] # Do PCA f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=2000, toFloat=False) ndict.savez(pca_params, logdir+'pca_params') x = {'x': f_enc(train_x).astype(np.float32), 'y':train_y.astype(np.float32)} x_valid = {'x': f_enc(test_x).astype(np.float32), 'y':test_y.astype(np.float32)} L_valid = 1 n_x = x['x'].shape[0] n_y = 5 dim_input = (size,size) n_batch = 1000 #23400/900 = 27 colorImg = False bernoulli_x = False byteToFloat = False mosaic_w = 5 mosaic_h = 1 type_px = 'gaussian' elif dataset == 'norb_instances': # resized NORB dataset with the instances as classes import anglepy.data.norb2 as norb2 size = _size #48 x, y = norb2.load_numpy_subclasses(size, binarize_y=True) _x = {'x': x, 'y': y} ndict.shuffleCols(_x) # Do pre=processing if True: # Works f_enc, f_dec, pca_params = pp.PCA(_x['x'][:,:10000], cutoff=600, global_sd=True, toFloat=True) ndict.savez(pca_params, logdir+'pca_params') elif False: # Doesn't work f_enc, f_dec, pp_params = pp.normalize_noise(_x['x'][:,:50000], noise_sd=0.01, global_sd=True, toFloat=True) else: # Doesn't work f_enc, f_dec, params = pp.normalize_random(x=x[:,:10000], global_sd=True, toFloat=True) ndict.savez(params, logdir+'normalize_random_params') n_valid = 5000 x = {'x': f_enc(_x['x'][:,:-n_valid]).astype(np.float32), 'y':_x['y'][:,:-n_valid].astype(np.float32)} x_valid = {'x': f_enc(_x['x'][:,:n_valid]).astype(np.float32), 'y':_x['y'][:,:n_valid].astype(np.float32)} L_valid = 1 n_x = x['x'].shape[0] n_y = 50 dim_input = (size,size) n_batch = 5000 #23400/900 = 27 colorImg = False bernoulli_x = False byteToFloat = False mosaic_w = 5 mosaic_h = 1 type_px = 'gaussian' elif dataset == 'svhn': # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, test_x, test_y = svhn.load_numpy(False, binarize_y=True) #norb.load_resized(size, binarize_y=True) extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True) x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))} ndict.shuffleCols(x) #f_enc, f_dec, (x_sd, x_mean) = pp.preprocess_normalize01(train_x, True) f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=1000, toFloat=True) ndict.savez(pca_params, logdir+'pca_params') n_y = 10 x = {'x': f_enc(x['x']).astype(np.float32), 'y': x['y'].astype(np.float32)} x_valid = {'x': f_enc(test_x).astype(np.float32), 'y': test_y.astype(np.float32)} L_valid = 1 n_x = x['x'].shape[0] dim_input = (size,size) n_batch = 5000 colorImg = True bernoulli_x = False byteToFloat = False mosaic_w = 5 mosaic_h = 2 type_px = 'gaussian' # Init model n_hidden_q = n_hidden n_hidden_p = n_hidden from anglepy.models import GPUVAE_YZ_X updates = get_adam_optimizer(alpha=3e-4, beta1=0.9, beta2=0.999, weight_decay=0) model = GPUVAE_YZ_X(updates, n_x, n_y, n_hidden_q, n_z, n_hidden_p[::-1], 'softplus', 'softplus', type_px=type_px, type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1, uniform_y=True) if False: dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414094291/' dir = '/home/ubuntu/results/gpulearn_yz_x_svhn_300-(500, 500)-1414163488/' w = ndict.loadz(dir+'w_best.ndict.tar.gz') v = ndict.loadz(dir+'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # Some statistics for optimization ll_valid_stats = [-1e99, 0] # Fixed sample for visualisation z_sample = {'z': np.repeat(np.random.standard_normal(size=(n_z, 12)), 12, axis=1).astype(np.float32)} y_sample = {'y': np.tile(np.random.multinomial(1, [1./n_y]*n_y, size=12).T, (1, 12))} # Progress hook def hook(epoch, t, ll): if epoch%10 != 0: return ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) if math.isnan(ll_valid): print "NaN detected. Reverting to saved best parameters" ndict.set_value(model.v, ndict.loadz(logdir+'v.ndict.tar.gz')) ndict.set_value(model.w, ndict.loadz(logdir+'w.ndict.tar.gz')) return if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir+'v_best') ndict.savez(ndict.get_value(model.w), logdir+'w_best') else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if False and ll_valid_stats[1] > 1000: print "Finished" with open(logdir+'hook.txt', 'a') as f: print >>f, "Finished" exit() # Log ndict.savez(ndict.get_value(model.v), logdir+'v') ndict.savez(ndict.get_value(model.w), logdir+'w') print epoch, t, ll, ll_valid with open(logdir+'hook.txt', 'a') as f: print >>f, t, ll, ll_valid if gfx: # Graphics v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} tail = '-'+str(epoch)+'.png' image = paramgraphics.mat_to_img(f_dec(v['w0x'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir+'q_w0x'+tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir+'out_w'+tail, 'PNG') _x = {'y': np.random.multinomial(1, [1./n_y]*n_y, size=144).T} _, _, _z_confab = model.gen_xz(_x, {}, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') _, _, _z_confab = model.gen_xz(y_sample, z_sample, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir+'samples_fixed'+tail, 'PNG') if n_z == 2: import ImageFont import ImageDraw n_width = 10 submosaic_offset = 15 submosaic_width = (dim_input[1]*n_width) submosaic_height = (dim_input[0]*n_width) mosaic = Image.new("RGB", (submosaic_width*mosaic_w, submosaic_offset+submosaic_height*mosaic_h)) for digit in range(0,n_y): if digit >= mosaic_h*mosaic_w: continue _x = {} n_batch_plot = n_width*n_width _x['y'] = np.zeros((n_y,n_batch_plot)) _x['y'][digit,:] = 1 _z = {'z':np.zeros((2,n_width**2))} for i in range(0,n_width): for j in range(0,n_width): _z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width) _z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width) _x, _, _z_confab = model.gen_xz(_x, _z, n_batch=n_batch_plot) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg, tile_spacing=(0,0)) #image.save(logdir+'samples_digit_'+str(digit)+'_'+tail, 'PNG') mosaic_x = (digit%mosaic_w)*submosaic_width mosaic_y = submosaic_offset+int(digit/mosaic_w)*submosaic_height mosaic.paste(image, (mosaic_x, mosaic_y)) draw = ImageDraw.Draw(mosaic) draw.text((1,1),"Epoch #"+str(epoch)+" Loss="+str(int(ll))) #plt.savefig(logdir+'mosaic'+tail, format='PNG') mosaic.save(logdir+'mosaic'+tail, 'PNG') #x_samples = _x['x'] #image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg) #image.save(logdir+'samples2'+tail, 'PNG') # Optimize dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat) loop_va(dostep, hook) pass
def main(n_z, n_hidden, dataset, seed, comment, gfx=True): # Initialize logdir import time logdir = 'results/gpulearn_z_x_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'_'+comment+'_'+str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir print 'gpulearn_z_x', n_z, n_hidden, dataset, seed with open(logdir+'hook.txt', 'a') as f: print >>f, 'learn_z_x', n_z, n_hidden, dataset, seed np.random.seed(seed) gfx_freq = 1 weight_decay = 0 f_enc, f_dec = lambda x:x, lambda x:x # Init data if dataset == 'mnist': import anglepy.data.mnist as mnist # MNIST size = 28 train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy(size) x = {'x': train_x.astype(np.float32)} x_valid = {'x': valid_x.astype(np.float32)} x_test = {'x': test_x.astype(np.float32)} L_valid = 1 dim_input = (size,size) n_x = size*size type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' nonlinear = 'softplus' type_px = 'bernoulli' n_train = 50000 n_batch = 1000 colorImg = False bernoulli_x = True byteToFloat = False weight_decay = float(n_batch)/n_train if dataset == 'mnist_binarized': import anglepy.data.mnist_binarized as mnist_binarized # MNIST train_x, valid_x, test_x = mnist_binarized.load_numpy(28) x = {'x': np.hstack((train_x, valid_x)).astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 dim_input = (28,28) n_x = 28*28 n_y = 10 type_qz = 'gaussianmarg' type_pz = 'mog' nonlinear = 'rectlin' type_px = 'bernoulli' n_train = 60000 n_batch = 1000 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'freyface': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy() np.random.shuffle(train_x) x = {'x': train_x.T[:,0:n_train]} x_valid = {'x': train_x.T[:,n_train:]} L_valid = 1 dim_input = (28,20) n_x = 20*28 type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'bounded01' nonlinear = 'tanh' #tanh works better with freyface #'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False weight_decay = float(n_batch)/n_train elif dataset == 'freyface_pca': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) f_enc, f_dec, _ = pp.PCA(train_x, 0.99) train_x = f_enc(train_x) x = {'x': train_x[:,0:n_train].astype(np.float32)} x_valid = {'x': train_x[:,n_train:].astype(np.float32)} L_valid = 1 dim_input = (28,20) n_x = train_x.shape[0] type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'freyface_bernoulli': # Frey's face import anglepy.data.freyface as freyface n_train = 1600 train_x = freyface.load_numpy().T np.random.shuffle(train_x.T) x = {'x': train_x[:,0:n_train].astype(np.float32)} x_valid = {'x': train_x[:,n_train:].astype(np.float32)} L_valid = 1 dim_input = (28,20) n_x = train_x.shape[0] type_pz = 'gaussianmarg' type_px = 'bernoulli' nonlinear = 'softplus' n_batch = 100 colorImg = False bernoulli_x = False byteToFloat = False elif dataset == 'norb': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size,size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False byteToFloat = False bernoulli_x = False weight_decay= float(n_batch)/train_x.shape[1] elif dataset == 'norb_pca': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) f_enc, f_dec, _ = pp.PCA(train_x, 0.999) #f_enc, f_dec, _ = pp.normalize_random(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size,size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay= float(n_batch)/train_x.shape[1] elif dataset == 'norb_normalized': # small NORB dataset import anglepy.data.norb as norb size = 48 train_x, train_y, test_x, test_y = norb.load_resized(size, binarize_y=True) #f_enc, f_dec, _ = pp.PCA(train_x, 0.99) #f_enc, f_dec, _ = pp.normalize_random(train_x) f_enc, f_dec, _ = pp.normalize(train_x) train_x = f_enc(train_x) test_x = f_enc(test_x) x = {'x': train_x.astype(np.float32)} x_valid = {'x': test_x.astype(np.float32)} L_valid = 1 n_x = train_x.shape[0] dim_input = (size,size) type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' n_batch = 900 #23400/900 = 27 colorImg = False #binarize = False bernoulli_x = False byteToFloat = False weight_decay= float(n_batch)/train_x.shape[1] elif dataset == 'svhn': # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, test_x, test_y = svhn.load_numpy(False, binarize_y=True) #norb.load_resized(size, binarize_y=True) extra_x, extra_y = svhn.load_numpy_extra(False, binarize_y=True) x = {'x': np.hstack((train_x, extra_x)), 'y':np.hstack((train_y, extra_y))} ndict.shuffleCols(x) print 'Performing PCA, can take a few minutes... ', f_enc, f_dec, pca_params = pp.PCA(x['x'][:,:10000], cutoff=600, toFloat=True) ndict.savez(pca_params, logdir+'pca_params') print 'Done.' n_y = 10 x = {'x': f_enc(x['x']).astype(np.float32)} x_valid = {'x': f_enc(test_x).astype(np.float32)} L_valid = 1 n_x = x['x'].shape[0] dim_input = (size,size) n_batch = 5000 colorImg = True bernoulli_x = False byteToFloat = False type_qz = 'gaussianmarg' type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' # Construct model from anglepy.models import GPUVAE_Z_X updates = get_adam_optimizer(learning_rate=3e-4, weight_decay=weight_decay) model = GPUVAE_Z_X(updates, n_x, n_hidden, n_z, n_hidden[::-1], nonlinear, nonlinear, type_px, type_qz=type_qz, type_pz=type_pz, prior_sd=100, init_sd=1e-3) if False: #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412689061/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412676966/' #dir = '/Users/dpkingma/results/learn_z_x_svhn_bernoulli_300-(1000, 1000)_l1l2_sharing_and_1000HU_1412695481/' #dir = '/Users/dpkingma/results/learn_z_x_mnist_binarized_50-(500, 500)_mog_1412695455/' #dir = '/Users/dpkingma/results/gpulearn_z_x_svhn_pca_300-(500, 500)__1413904756/' dir = '/home/ubuntu/results/gpulearn_z_x_mnist_50-[500, 500]__1414259423/' w = ndict.loadz(dir+'w_best.ndict.tar.gz') v = ndict.loadz(dir+'v_best.ndict.tar.gz') ndict.set_value(model.w, w) ndict.set_value(model.v, v) # Some statistics for optimization ll_valid_stats = [-1e99, 0] # Progress hook def hook(epoch, t, ll): if epoch%10 != 0: return ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) # Log ndict.savez(ndict.get_value(model.v), logdir+'v') ndict.savez(ndict.get_value(model.w), logdir+'w') if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir+'v_best') ndict.savez(ndict.get_value(model.w), logdir+'w_best') else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if ll_valid_stats[1] > 1000: print "Finished" with open(logdir+'hook.txt', 'a') as f: print >>f, "Finished" exit() print epoch, t, ll, ll_valid, ll_valid_stats with open(logdir+'hook.txt', 'a') as f: print >>f, epoch, t, ll, ll_valid, ll_valid_stats # Graphics if gfx and epoch%gfx_freq == 0: #tail = '.png' tail = '-'+str(epoch)+'.png' v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} if 'pca' not in dataset and 'random' not in dataset and 'normalized' not in dataset: if 'w0' in v: image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir+'q_w0'+tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir+'out_w'+tail, 'PNG') if 'out_unif' in w: image = paramgraphics.mat_to_img(f_dec(w['out_unif'].reshape((-1,1))), dim_input, True, colorImg=colorImg) image.save(logdir+'out_unif'+tail, 'PNG') if n_z == 2: n_width = 10 import scipy.stats z = {'z':np.zeros((2,n_width**2))} for i in range(0,n_width): for j in range(0,n_width): z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width) z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width) x, _, _z = model.gen_xz({}, z, n_width**2) if dataset == 'mnist': x = 1 - _z['x'] image = paramgraphics.mat_to_img(f_dec(_z['x']), dim_input) image.save(logdir+'2dmanifold'+tail, 'PNG') else: _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') #x_samples = _x['x'] #image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) #image.save(logdir+'samples2'+tail, 'PNG') else: # Model with preprocessing if 'w0' in v: image = paramgraphics.mat_to_img(f_dec(v['w0'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir+'q_w0'+tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir+'out_w'+tail, 'PNG') _x, _, _z_confab = model.gen_xz({}, {}, n_batch=144) x_samples = f_dec(_z_confab['x']) x_samples = np.minimum(np.maximum(x_samples, 0), 1) image = paramgraphics.mat_to_img(x_samples, dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') # Optimize #SFO dostep = epoch_vae_adam(model, x, n_batch=n_batch, bernoulli_x=bernoulli_x, byteToFloat=byteToFloat) loop_va(dostep, hook) pass
def main(n_passes, n_labeled, n_z, n_hidden, dataset, seed, alpha, n_minibatches, comment): ''' Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z) And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised). We're going to use q(y|x) as a classification model. ''' import time logdir = 'results/learn_yz_x_ss_'+dataset+'_'+str(n_z)+'-'+str(n_hidden)+'_nlabeled'+str(n_labeled)+'_alpha'+str(alpha)+'_seed'+str(seed)+'_'+comment+'-'+str(int(time.time()))+'/' if not os.path.exists(logdir): os.makedirs(logdir) print 'logdir:', logdir print sys.argv[0], n_labeled, n_z, n_hidden, dataset, seed, comment np.random.seed(seed) # Init data if dataset == 'mnist_2layer': size = 28 dim_input = (size,size) # Load model for feature extraction path = 'models/mnist_z_x_50-500-500_longrun/' #'models/mnist_z_x_50-600-600/' l1_v = ndict.loadz(path+'v.ndict.tar.gz') l1_w = ndict.loadz(path+'w.ndict.tar.gz') n_h = (500,500) from anglepy.models.VAE_Z_X import VAE_Z_X l1_model = VAE_Z_X(n_x=28*28, n_hidden_q=n_h, n_z=50, n_hidden_p=n_h, nonlinear_q='softplus', nonlinear_p='softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # Load dataset import anglepy.data.mnist as mnist # load train and test sets train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split(size, binarize_y=True) # create labeled/unlabeled split in training set x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([_x] + v.values() + [np.ones((1, _x.shape[1]))])) q_mean, _ = transform(l1_v, x_u[0:1000]) idx_keep = np.std(q_mean, axis=1) > 0.1 # 2. Select dimensions for key in ['mean_b','mean_w','logvar_b','logvar_w']: l1_v[key] = l1_v[key][idx_keep,:] l1_w['w0'] = l1_w['w0'][:,idx_keep] # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean':x_mean_u, 'logvar':x_logvar_u, 'y':y_u} x_labeled = {'mean':x_mean_l, 'logvar':x_logvar_l, 'y':y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = np.sum(idx_keep) n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' colorImg = False if dataset == 'svhn_2layer': size = 32 dim_input = (size,size) # Load model for feature extraction path = 'models/tmp/svhn_z_x_300-500-500/' l1_v = ndict.loadz(path+'v.ndict.tar.gz') l1_w = ndict.loadz(path+'w.ndict.tar.gz') f_enc, f_dec = pp.PCA_fromfile(path+'pca_params.ndict.tar.gz', True) from anglepy.models.VAE_Z_X import VAE_Z_X n_x = l1_v['w0'].shape[1] #=600 l1_model = VAE_Z_X(n_x=n_x, n_hidden_q=(600,600), n_z=300, n_hidden_p=(600,600), nonlinear_q='softplus', nonlinear_p='softplus', type_px='gaussian', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, valid_x, valid_y, test_x, test_y = svhn.load_numpy_split(False, binarize_y=True, extra=False) #norb.load_resized(size, binarize_y=True) #train_x = np.hstack((_train_x, extra_x)) #train_y = np.hstack((_train_y, extra_y))[:,:604000] # create labeled/unlabeled split in training set import anglepy.data.mnist as mnist x_l, y_l, x_u, y_u = mnist.create_semisupervised(train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([f_enc(_x)] + v.values() + [np.ones((1, _x.shape[1]))])) # 2. We're keeping all latent dimensions # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean':x_mean_u, 'logvar':x_logvar_u, 'y':y_u} x_labeled = {'mean':x_mean_l, 'logvar':x_logvar_l, 'y':y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = l1_w['w0'].shape[1] n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' # Init VAE model p(x,y,z) from anglepy.models.VAE_YZ_X import VAE_YZ_X uniform_y = True model = VAE_YZ_X(n_x, n_y, n_hidden, n_z, n_hidden, nonlinear, nonlinear, type_px, type_qz="gaussianmarg", type_pz=type_pz, prior_sd=1, uniform_y=uniform_y) v, w = model.init_w(1e-3) # Init q(y|x) model from anglepy.models.MLP_Categorical import MLP_Categorical n_units = [n_x]+list(n_hidden)+[n_y] model_qy = MLP_Categorical(n_units=n_units, prior_sd=1, nonlinearity=nonlinear) u = model_qy.init_w(1e-3) # Just test if False: u = ndict.loadz('u.ndict.tar.gz') v = ndict.loadz('v.ndict.tar.gz') w = ndict.loadz('w.ndict.tar.gz') pass # Progress hook t0 = time.time() def hook(t, u, v, w, ll): # Get classification error of validation and test sets def error(dataset_x, dataset_y): _, _, _z = model_qy.gen_xz(u, {'x':dataset_x}, {}) return np.sum( np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0)) / (0.0 + dataset_y.shape[1]) valid_error = error(valid_x, valid_y) test_error = error(test_x, test_y) # Log ndict.savez(u, logdir+'u') ndict.savez(v, logdir+'v') ndict.savez(w, logdir+'w') dt = time.time() - t0 print dt, t, ll, valid_error, test_error with open(logdir+'hook.txt', 'a') as f: print >>f, dt, t, ll, valid_error, test_error return valid_error # Optimize result = optim_vae_ss_adam(alpha, model_qy, model, x_labeled, x_unlabeled, n_y, u, v, w, n_minibatches=n_minibatches, n_passes=n_passes, hook=hook) return result
import numpy as np from anglepy import ndict import scipy.io as sio import cPickle, gzip import math import os, sys # load data, recognition model and generative model print 'Loading data...' dir = sys.argv[1] p_type = sys.argv[5] if p_type == 'null': p_type = '' v = ndict.loadz(dir + 'v' + p_type + '.ndict.tar.gz') w = ndict.loadz(dir + 'w' + p_type + '.ndict.tar.gz') # perturb data print 'Loading perturbed data...' width = 28 height = 28 denoise_tpye = 1 # sample or mean pertub_type = int(sys.argv[2]) pertub_prob = float(sys.argv[3]) denoise_times = int(sys.argv[4]) # denoising epoch print pertub_type, pertub_prob, denoise_times if pertub_type == 4:
def hook(epoch, t, ll): if epoch%10 != 0: return ll_valid, _ = model.est_loglik(x_valid, n_samples=L_valid, n_batch=n_batch, byteToFloat=byteToFloat) if math.isnan(ll_valid): print "NaN detected. Reverting to saved best parameters" ndict.set_value(model.v, ndict.loadz(logdir+'v.ndict.tar.gz')) ndict.set_value(model.w, ndict.loadz(logdir+'w.ndict.tar.gz')) return if ll_valid > ll_valid_stats[0]: ll_valid_stats[0] = ll_valid ll_valid_stats[1] = 0 ndict.savez(ndict.get_value(model.v), logdir+'v_best') ndict.savez(ndict.get_value(model.w), logdir+'w_best') else: ll_valid_stats[1] += 1 # Stop when not improving validation set performance in 100 iterations if False and ll_valid_stats[1] > 1000: print "Finished" with open(logdir+'hook.txt', 'a') as f: print >>f, "Finished" exit() # Log ndict.savez(ndict.get_value(model.v), logdir+'v') ndict.savez(ndict.get_value(model.w), logdir+'w') print epoch, t, ll, ll_valid with open(logdir+'hook.txt', 'a') as f: print >>f, t, ll, ll_valid if gfx: # Graphics v = {i: model.v[i].get_value() for i in model.v} w = {i: model.w[i].get_value() for i in model.w} tail = '-'+str(epoch)+'.png' image = paramgraphics.mat_to_img(f_dec(v['w0x'][:].T), dim_input, True, colorImg=colorImg) image.save(logdir+'q_w0x'+tail, 'PNG') image = paramgraphics.mat_to_img(f_dec(w['out_w'][:]), dim_input, True, colorImg=colorImg) image.save(logdir+'out_w'+tail, 'PNG') _x = {'y': np.random.multinomial(1, [1./n_y]*n_y, size=144).T} _, _, _z_confab = model.gen_xz(_x, {}, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir+'samples'+tail, 'PNG') _, _, _z_confab = model.gen_xz(y_sample, z_sample, n_batch=144) image = paramgraphics.mat_to_img(f_dec(_z_confab['x']), dim_input, colorImg=colorImg) image.save(logdir+'samples_fixed'+tail, 'PNG') if n_z == 2: import ImageFont import ImageDraw n_width = 10 submosaic_offset = 15 submosaic_width = (dim_input[1]*n_width) submosaic_height = (dim_input[0]*n_width) mosaic = Image.new("RGB", (submosaic_width*mosaic_w, submosaic_offset+submosaic_height*mosaic_h)) for digit in range(0,n_y): if digit >= mosaic_h*mosaic_w: continue _x = {} n_batch_plot = n_width*n_width _x['y'] = np.zeros((n_y,n_batch_plot)) _x['y'][digit,:] = 1 _z = {'z':np.zeros((2,n_width**2))} for i in range(0,n_width): for j in range(0,n_width): _z['z'][0,n_width*i+j] = scipy.stats.norm.ppf(float(i)/n_width+0.5/n_width) _z['z'][1,n_width*i+j] = scipy.stats.norm.ppf(float(j)/n_width+0.5/n_width) _x, _, _z_confab = model.gen_xz(_x, _z, n_batch=n_batch_plot) x_samples = _z_confab['x'] image = paramgraphics.mat_to_img(f_dec(x_samples), dim_input, colorImg=colorImg, tile_spacing=(0,0)) #image.save(logdir+'samples_digit_'+str(digit)+'_'+tail, 'PNG') mosaic_x = (digit%mosaic_w)*submosaic_width mosaic_y = submosaic_offset+int(digit/mosaic_w)*submosaic_height mosaic.paste(image, (mosaic_x, mosaic_y)) draw = ImageDraw.Draw(mosaic) draw.text((1,1),"Epoch #"+str(epoch)+" Loss="+str(int(ll))) #plt.savefig(logdir+'mosaic'+tail, format='PNG') mosaic.save(logdir+'mosaic'+tail, 'PNG')
def main(n_passes, n_labeled, n_z, n_hidden, dataset, seed, alpha, n_minibatches, comment): ''' Learn a variational auto-encoder with generative model p(x,y,z)=p(y)p(z)p(x|y,z) And where 'x' is always observed and 'y' is _sometimes_ observed (hence semi-supervised). We're going to use q(y|x) as a classification model. ''' import time logdir = 'results/learn_yz_x_ss_' + dataset + '_' + str(n_z) + '-' + str( n_hidden) + '_nlabeled' + str(n_labeled) + '_alpha' + str( alpha) + '_seed' + str(seed) + '_' + comment + '-' + str( int(time.time())) + '/' if not os.path.exists(logdir): os.makedirs(logdir) print('logdir:', logdir) print(sys.argv[0], n_labeled, n_z, n_hidden, dataset, seed, comment) np.random.seed(seed) # Init data if dataset == 'mnist_2layer': size = 28 dim_input = (size, size) # Load model for feature extraction path = 'models/mnist_z_x_50-500-500_longrun/' #'models/mnist_z_x_50-600-600/' l1_v = ndict.loadz(path + 'v.ndict.tar.gz') l1_w = ndict.loadz(path + 'w.ndict.tar.gz') n_h = (500, 500) from anglepy.models.VAE_Z_X import VAE_Z_X l1_model = VAE_Z_X(n_x=28 * 28, n_hidden_q=n_h, n_z=50, n_hidden_p=n_h, nonlinear_q='softplus', nonlinear_p='softplus', type_px='bernoulli', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # Load dataset import anglepy.data.mnist as mnist # load train and test sets train_x, train_y, valid_x, valid_y, test_x, test_y = mnist.load_numpy_split( size, binarize_y=True) # create labeled/unlabeled split in training set x_l, y_l, x_u, y_u = mnist.create_semisupervised( train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([_x] + list(v.values()) + [np.ones((1, _x.shape[1]))])) q_mean, _ = transform(l1_v, x_u[0:1000]) idx_keep = np.std(q_mean, axis=1) > 0.1 # 2. Select dimensions for key in ['mean_b', 'mean_w', 'logvar_b', 'logvar_w']: l1_v[key] = l1_v[key][idx_keep, :] l1_w['w0'] = l1_w['w0'][:, idx_keep] # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u} x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = np.sum(idx_keep) n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' colorImg = False if dataset == 'svhn_2layer': size = 32 dim_input = (size, size) # Load model for feature extraction path = 'models/tmp/svhn_z_x_300-500-500/' l1_v = ndict.loadz(path + 'v.ndict.tar.gz') l1_w = ndict.loadz(path + 'w.ndict.tar.gz') f_enc, f_dec = pp.PCA_fromfile(path + 'pca_params.ndict.tar.gz', True) from anglepy.models.VAE_Z_X import VAE_Z_X n_x = l1_v['w0'].shape[1] #=600 l1_model = VAE_Z_X(n_x=n_x, n_hidden_q=(600, 600), n_z=300, n_hidden_p=(600, 600), nonlinear_q='softplus', nonlinear_p='softplus', type_px='gaussian', type_qz='gaussianmarg', type_pz='gaussianmarg', prior_sd=1) # SVHN dataset import anglepy.data.svhn as svhn size = 32 train_x, train_y, valid_x, valid_y, test_x, test_y = svhn.load_numpy_split( False, binarize_y=True, extra=False) #norb.load_resized(size, binarize_y=True) #train_x = np.hstack((_train_x, extra_x)) #train_y = np.hstack((_train_y, extra_y))[:,:604000] # create labeled/unlabeled split in training set import anglepy.data.mnist as mnist x_l, y_l, x_u, y_u = mnist.create_semisupervised( train_x, train_y, n_labeled) # Extract features # 1. Determine which dimensions to keep def transform(v, _x): return l1_model.dist_qz['z'](*([f_enc(_x)] + list(v.values()) + [np.ones((1, _x.shape[1]))])) # 2. We're keeping all latent dimensions # 3. Extract features x_mean_u, x_logvar_u = transform(l1_v, x_u) x_mean_l, x_logvar_l = transform(l1_v, x_l) x_unlabeled = {'mean': x_mean_u, 'logvar': x_logvar_u, 'y': y_u} x_labeled = {'mean': x_mean_l, 'logvar': x_logvar_l, 'y': y_l} valid_x, _ = transform(l1_v, valid_x) test_x, _ = transform(l1_v, test_x) n_x = l1_w['w0'].shape[1] n_y = 10 type_pz = 'gaussianmarg' type_px = 'gaussian' nonlinear = 'softplus' # Init VAE model p(x,y,z) from anglepy.models.VAE_YZ_X import VAE_YZ_X uniform_y = True model = VAE_YZ_X(n_x, n_y, n_hidden, n_z, n_hidden, nonlinear, nonlinear, type_px, type_qz="gaussianmarg", type_pz=type_pz, prior_sd=1, uniform_y=uniform_y) v, w = model.init_w(1e-3) # Init q(y|x) model from anglepy.models.MLP_Categorical import MLP_Categorical n_units = [n_x] + list(n_hidden) + [n_y] model_qy = MLP_Categorical(n_units=n_units, prior_sd=1, nonlinearity=nonlinear) u = model_qy.init_w(1e-3) # Just test if False: u = ndict.loadz('u.ndict.tar.gz') v = ndict.loadz('v.ndict.tar.gz') w = ndict.loadz('w.ndict.tar.gz') pass # Progress hook t0 = time.time() def hook(t, u, v, w, ll): # Get classification error of validation and test sets def error(dataset_x, dataset_y): _, _, _z = model_qy.gen_xz(u, {'x': dataset_x}, {}) return np.sum( np.argmax(_z['py'], axis=0) != np.argmax(dataset_y, axis=0) ) / (0.0 + dataset_y.shape[1]) valid_error = error(valid_x, valid_y) test_error = error(test_x, test_y) # Log ndict.savez(u, logdir + 'u') ndict.savez(v, logdir + 'v') ndict.savez(w, logdir + 'w') dt = time.time() - t0 print(dt, t, ll, valid_error, test_error) with open(logdir + 'hook.txt', 'a') as f: print(dt, t, ll, valid_error, test_error, file=f) return valid_error # Optimize result = optim_vae_ss_adam(alpha, model_qy, model, x_labeled, x_unlabeled, n_y, u, v, w, n_minibatches=n_minibatches, n_passes=n_passes, hook=hook) return result
from anglepy import ndict import scipy.io as sio import cPickle, gzip import math import os, sys # load data, recognition model and generative model print "Loading data..." f = gzip.open("data/mnist/mnist_28.pkl.gz", "rb") (x_train, t_train), (x_valid, t_valid), (x_test, t_test) = cPickle.load(f) f.close() dir = sys.argv[1] v = ndict.loadz(dir + "v_best.ndict.tar.gz") w = ndict.loadz(dir + "w_best.ndict.tar.gz") # choose number of images to transform and number of images to do visualization num_trans = 1000 num_show = 300 data = (x_test[:num_trans, :]).T pertub_label = np.ones(data.shape) # perturb data print "Loading perturbed data..." width = 28 height = 28 denoise_tpye = 1 # sample or mean pertub_type = int(sys.argv[2])