def pt_grad(self, params, inpts, **kwargs): g = gzeros(params.shape) m, _ = inpts.shape hddn = logistic( gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]] ) Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :] w = params[: self.m_end].reshape(self.shape) cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0)) cae *= self.cae _, delta = self.score(Z, inpts, error=True, addon=cae) g[: self.m_end] = gdot(delta.T, hddn).ravel() g[-self.shape[0] :] = delta.sum(axis=0) cae_grad = gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * w cae_grad += gdot(inpts.T, (Dsigmoid(hddn) ** 2 * (1 - 2 * hddn))) / m * gpu.sum(w ** 2, axis=0) g[: self.m_end] += self.cae * 2 * cae_grad.ravel() dsc_dha = Dsigmoid(hddn) * gdot(delta, params[: self.m_end].reshape(self.shape)) g[: self.m_end] += gdot(inpts.T, dsc_dha).ravel() g[self.m_end : -self.shape[0]] = dsc_dha.sum(axis=0) # clean up del delta, hddn, Z return g
def pt_grad(self, params, inpts, **kwargs): g = gzeros(params.shape) m, _ = inpts.shape hddn = logistic( gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end + self.shape[1]]) Z = gdot(hddn, params[:self.m_end].reshape( self.shape).T) + params[-self.shape[0]:] w = params[:self.m_end].reshape(self.shape) cae = gpu.sum( gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0)) cae *= self.cae _, delta = self.score(Z, inpts, error=True, addon=cae) g[:self.m_end] = gdot(delta.T, hddn).ravel() g[-self.shape[0]:] = delta.sum(axis=0) cae_grad = gpu.mean(Dsigmoid(hddn)**2, axis=0) * w cae_grad += (gdot(inpts.T, (Dsigmoid(hddn)**2 * (1 - 2 * hddn))) / m * gpu.sum(w**2, axis=0)) g[:self.m_end] += self.cae * 2 * cae_grad.ravel() dsc_dha = Dsigmoid(hddn) * gdot( delta, params[:self.m_end].reshape(self.shape)) g[:self.m_end] += gdot(inpts.T, dsc_dha).ravel() g[self.m_end:-self.shape[0]] = dsc_dha.sum(axis=0) # clean up del delta, hddn, Z return g
def updateParams(self,scale,update,log=False): if log: for w,u in zip(self.stack,update): wrms = gp.sqrt(gp.mean(w[0]**2)) urms = gp.sqrt(gp.mean((scale*u[0])**2)) print "weight rms=%f -- update rms=%f"%(wrms,urms) self.stack = [[ws[0]+scale*wsDelta[0],ws[1]+scale*wsDelta[1]] for ws,wsDelta in zip(self.stack,update)]
def get_drop_masks(self, mask_count, in_drop=0, hd_drop=0): """Get mask_count dropout masks shaped for each layer in self.layers. Dropout masks are computed based on drop rates self.drop_input and self.drop_hidden, and self.drop_undrop. Masks are scaled so that the sum of each mask for a given layer is the same. If in_drop == 1, we do dropping on input layer and if hd_drop == 1, we also drop hiddens. """ M = [] # Generate an 'undrop' mask, which sets some masks to be dropless u_mask = (gp.rand(mask_count,1) < self.drop_undrop) for i in range(self.layer_count): # Set drop_rate based on layer and in_drop/hd_drop drop_rate = 0.0 if ((i == 0) and (in_drop == 1)): drop_rate = self.drop_input elif (hd_drop == 1): drop_rate = self.drop_hidden # Get mask dimension for this layer mask_dim = self.layers[i].dim_input # Generate random 'bit' mask d_mask = (gp.rand(mask_count, mask_dim) > drop_rate) # Compute bootleg 'or' with the undrop mask mask = ((d_mask + u_mask) > 0.1) # Rescale mask entries to have unit mean scales = 1.0 / gp.mean(mask, axis=1) scales = scales[:,gp.newaxis] mask = mask * scales # Record the generated mask M.append(mask) return M
def cov(x): y = gpu.mean(x, axis=1)[:, None] x = x.as_numpy_array().__sub__(y.as_numpy_array()) x_T = x.T.conj() result = gpu.dot(x, x_T) result = result.__div__(x.shape[1] - 1) return result
def constrain_weights(self): for i, rms_limit in enumerate(self.rms_limits): if not rms_limit: continue W = self.weights[i] rms_scale = rms_limit / gnp.sqrt(gnp.mean(W*W, axis=0)) limit_rms = W * (1+(rms_scale < 1) * (rms_scale - 1)) self.weights[i] = limit_rms
def reconstruction_cross_entropy(self, vis): """Returns the cross entropy between vis and its reconstruction obtained by one step of Gibbs sampling.""" _, sampled_p_vis = self.gibbs_sample(vis, 1) cross_entropy = gp.mean(vis * gp.log(sampled_p_vis) - (1 - vis) * gp.log(1-sampled_p_vis), axis=1) return cross_entropy
def limitColumnRMS(W, rmsLim): """ All columns of W with rms entry above the limit are scaled to equal the limit. The limit can either be a row vector or a scalar. Apply to 2-d array W. """ columnRMS = lambda W: gnp.sqrt(gnp.mean(W * W, axis=0)) rmsScale = rmsLim / columnRMS(W) return W * (1 + (rmsScale < 1) * (rmsScale - 1))
def pt_score(self, params, inpts, **kwargs): hddn = logistic( gpu.dot(inpts, params[: self.m_end].reshape(self.shape)) + params[self.m_end : self.m_end + self.shape[1]] ) Z = gdot(hddn, params[: self.m_end].reshape(self.shape).T) + params[-self.shape[0] :] w = params[: self.m_end].reshape(self.shape) cae = gpu.sum(gpu.mean(Dsigmoid(hddn) ** 2, axis=0) * gpu.sum(w ** 2, axis=0)) cae *= self.cae sc = self.score(Z, inpts, addon=cae) return np.array([sc, cae])
def pt_score(self, params, inpts, **kwargs): hddn = logistic(gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end+self.shape[1]]) Z = gdot(hddn, params[:self.m_end].reshape(self.shape).T) + params[-self.shape[0]:] if self.rho_hat == None: self.rho_hat = hddn.mean(axis=0) else: self.rho_hat *= 0.9 self.rho_hat += 0.1*hddn.mean(axis=0) sparsity = self.beta * gpu.sum(bKL(self.rho, self.rho_hat)) sc = self.score(Z, inpts, addon=sparsity) return np.array([sc, sc-sparsity, sparsity, gpu.mean(self.rho_hat)])
def train(self): self.time_interval = 0 t1 = time.time() cd = 1 for current_epochs, weight_size in zip(self.epochs, self.weights_to_do): self.initialize_weights(weight_size) for epoch in xrange(current_epochs): error = 0 for start_idx in range(0, self.X.shape[0], self.batch_size): self.w_updt = gpu.zeros((self.input, weight_size)) self.bias_h_updt = gpu.zeros((1, weight_size)) self.bias_v_updt = gpu.zeros((1, self.input)) self.allocate_batch(start_idx) self.input_original = self.get_visible_vector(self.batch) self.input_dropped = self.input_original self.positive_phase() self.gibbs_updates(weight_size) for j in range(cd): self.negative_phase() self.w += self.alpha * self.w_updt / float( self.current_batch_size) self.bias_h += self.alpha * self.bias_h_updt / float( self.current_batch_size) self.bias_v += self.alpha * self.bias_v_updt / float( self.current_batch_size) t0 = time.time() error += gpu.mean( (self.input_dropped - self.input_original)**2) self.time_interval += time.time() - t0 s = 'EPOCH: ' + str(epoch + 1) self.log_message(s) s = 'Reconstruction error: ' + str( error / (self.X.shape[0] / float(self.batch_size))) self.log_message(s) self.trained_weights.append( [self.w.as_numpy_array(), self.bias_h.as_numpy_array()]) self.input = self.w.shape[1] print 'Time interval: ' + str(self.time_interval) print 'Training time: ' + str(time.time() - t1) self.free_GPU_memory() return self.trained_weights
def pt_score(self, params, inpts, **kwargs): hddn = logistic( gpu.dot(inpts, params[:self.m_end].reshape(self.shape)) + params[self.m_end:self.m_end + self.shape[1]]) Z = gdot(hddn, params[:self.m_end].reshape( self.shape).T) + params[-self.shape[0]:] w = params[:self.m_end].reshape(self.shape) cae = gpu.sum( gpu.mean(Dsigmoid(hddn)**2, axis=0) * gpu.sum(w**2, axis=0)) cae *= self.cae sc = self.score(Z, inpts, addon=cae) return np.array([sc, cae])
def xe(z, targets, predict=False, error=False, addon=0): """ Cross entropy error. """ if predict: return gpu.argmax(z, axis=1) _xe = z - logsumexp(z, axis=1) n, _ = _xe.shape xe = -gpu.mean(_xe[np.arange(n), targets]) if error: err = gpu.exp(_xe) err[np.arange(n), targets] -= 1 return xe + addon, err / n else: return xe + addon
def train(self): epochs = 50 batches = self.data.shape[0] alpha = 0.1 self.time_interval = 0 t1 = time.time() cd = 1 cd3 = 10 cd10 = 15 for epoch in xrange(epochs): error = 0 for i in xrange(batches): self.w_updt = gpu.zeros((784, 800)) self.bias_h_updt = gpu.zeros((1,800)) self.bias_v_updt = gpu.zeros((1,784)) for j in range(cd): self.v_original = gpu.garray(self.data[i]) self.v = self.v_original self.positive_phase() self.gibbs_updates() self.negative_phase() self.w += alpha*self.w_updt/100. self.bias_h += alpha*self.bias_h_updt/100. self.bias_v += alpha*self.bias_v_updt/100. t0 = time.time() error += gpu.mean((self.v-self.v_original)**2) self.time_interval += time.time() - t0 print 'EPOCH: ' + str(epoch + 1) print 'Reconstruction error: ' + str(error/batches) if epoch == cd10: cd = 10 elif epoch == cd3: cd = 3 print 'Time interval: ' + str(self.time_interval) print 'Training time: ' + str(time.time() - t1) np.save('/home/tim/development/RBM_w1.npy',self.w.as_numpy_array())
def train(self): self.time_interval = 0 t1 = time.time() cd = 1 for current_epochs, weight_size in zip(self.epochs, self.weights_to_do): self.initialize_weights(weight_size) for epoch in xrange(current_epochs): error = 0 for start_idx in range(0, self.X.shape[0], self.batch_size): self.w_updt = gpu.zeros((self.input, weight_size)) self.bias_h_updt = gpu.zeros((1, weight_size)) self.bias_v_updt = gpu.zeros((1, self.input)) self.allocate_batch(start_idx) self.input_original = self.get_visible_vector(self.batch) self.input_dropped = self.input_original self.positive_phase() self.gibbs_updates(weight_size) for j in range(cd): self.negative_phase() self.w += self.alpha * self.w_updt / float(self.current_batch_size) self.bias_h += self.alpha * self.bias_h_updt / float(self.current_batch_size) self.bias_v += self.alpha * self.bias_v_updt / float(self.current_batch_size) t0 = time.time() error += gpu.mean((self.input_dropped - self.input_original) ** 2) self.time_interval += time.time() - t0 s = "EPOCH: " + str(epoch + 1) self.log_message(s) s = "Reconstruction error: " + str(error / (self.X.shape[0] / float(self.batch_size))) self.log_message(s) self.trained_weights.append([self.w.as_numpy_array(), self.bias_h.as_numpy_array()]) self.input = self.w.shape[1] print "Time interval: " + str(self.time_interval) print "Training time: " + str(time.time() - t1) self.free_GPU_memory() return self.trained_weights
def columnRMS(W): return gnp.sqrt(gnp.mean(W*W,axis=0))
# train rbm print "Training ml.rbm..." rbm = mnist_rbm.train_rbm(seed=seed, plot_samples=False) # estimate PF using AIS print "Estimating partition function using %d AIS runs with %d intermediate "\ "RBMs and %d Gibbs steps..." % (ais_runs, len(ais_betas), ais_gibbs_steps) ais = AnnealedImportanceSampler(rbm, ais_base_samples, ais_base_chains, ais_base_gibbs_steps_between_samples) lpf, lpf_m_3s, lpf_p_3s = ais.log_partition_function(ais_betas, ais_runs, ais_gibbs_steps) ml.rbm.log_pf = lpf # calculate log probability of training and test set tr_lp = gp.mean(rbm.normalized_log_p_vis(mnist_rbm.X)) tst_lp = gp.mean(rbm.normalized_log_p_vis(mnist_rbm.TX)) print "Average log p(x from training set) = %f" % tr_lp print "Average log p(x from test set) = %f" % tst_lp # accumulate statistics tr_lps.append(tr_lp) tst_lps.append(tst_lp) # save statistics rbmutil.leave_rbm_plot_directory() if cfg.use_pcd: pcd_str = "p" else: pcd_str = "" np.savez_compressed("mnist-rbm-%03d-%scd%02d-performance.npz" %
def mean(A, axis): return gp.mean(A, axis=axis)
"prob.txt", clean=False) # Build RBM rbm = RestrictedBoltzmannMachine(0, cfg.n_vis, cfg.n_hid, 0) # load Ruslan's RBM if use_ruslan: print "Loading Ruslan's ml.rbm..." mdata = scipy.io.loadmat("matlab_epoch%d.mat" % (epoch + 1)) ml.rbm.bias_vis = gp.as_garray(mdata['visbiases'][0,:]) ml.rbm.bias_hid = gp.as_garray(mdata['hidbiases'][0,:]) ml.rbm.weights = gp.as_garray(mdata['vishid']) else: rbmutil.load_parameters(rbm, "weights-%02i.npz" % epoch) # load pratition function if use_ruslan: filename = "matlab-lpf-%02d.npz" % (epoch+1) else: filename = "lpf-%02d.npz" % epoch print "Loading partition function %s" % filename lpf = np.load(filename) rbm.log_pf = lpf['lpf'] # calculate log probability of training set tr_lp = gp.mean(rbm.normalized_log_p_vis(X)) tst_lp = gp.mean(rbm.normalized_log_p_vis(TX)) print "Average log p(x from training set) = %f" % tr_lp print "Average log p(x from test set) = %f" % tst_lp
def init_using_dataset(self, vis_samples): "Calculates the biases of the base rate RBM using the given samples" epsilon = 1e-2 vis_mean = gp.mean(vis_samples, axis=0) self.base_bias_vis = gp.log((vis_mean + epsilon) / (1 - vis_mean + epsilon))
def JointBayesian_Train(trainingset, label, fold="./"): if fold[-1] != '/': fold += '/' print trainingset.shape print trainingset[0] # the total num of image n_image = len(label) # the dim of features n_dim = trainingset.shape[1] # filter the complicate label,for count the total people num classes, labels = np.unique(label, return_inverse=True) # the total people num n_class = len(classes) # print classes # print labels # save each people items cur = {} withinCount = 0 # record the count of each people numberBuff = np.zeros(n_image, np.float32) maxNumberInOneClass = 0 for i in range(n_class): # get the item of i cur[i] = trainingset[labels == i] # 00x # cur_gpu = shared(cur[i]) # get the number of the same label persons n_same_label = cur[i].shape[0] if n_same_label > 1: withinCount += n_same_label if numberBuff[n_same_label] == 0: numberBuff[n_same_label] = 1 maxNumberInOneClass = max(maxNumberInOneClass, n_same_label) utils.print_info("prepare done, maxNumberInOneClass=" + str(maxNumberInOneClass)) u = np.zeros([n_dim, n_class], np.float32) u_gpu = gpu.garray(u) ep = np.zeros([n_dim, withinCount], np.float32) ep_gpu = gpu.garray(ep) nowp = 0 for i in range(n_class): # the mean of cur[i] cur_gpu = gpu.garray(cur[i]) u_gpu[:, i] = gpu.mean(cur_gpu, 0) b = u_gpu[:, i].reshape(n_dim, 1) n_same_label = cur[i].shape[0] if n_same_label > 1: ep_gpu[:, nowp:nowp + n_same_label] = cur_gpu.T - b nowp += n_same_label utils.print_info("stage1 done") Su = cov(u_gpu) gpu.status() Sw = cov(ep_gpu) oldSw = Sw SuFG = {} SwG = {} convergence = 1 min_convergence = 1 for l in range(500): F = np.linalg.pinv(Sw.as_numpy_array()) F_gpu = gpu.garray(F) u = np.zeros([n_dim, n_class], np.float32) u_gpu = gpu.garray(u) ep = np.zeros([n_dim, n_image], np.float32) ep_gpu = gpu.garray(ep) nowp = 0 for mi in range(maxNumberInOneClass + 1): if numberBuff[mi] == 1: # G = −(mS μ + S ε )−1*Su*Sw−1 temp = np.linalg.pinv(mi * Su.as_numpy_array() + Sw.as_numpy_array()) temp2 = gpu.dot(gpu.garray(temp), Su) G = -gpu.dot(temp2, F_gpu) # Su*(F+mi*G) for u SuFG[mi] = gpu.dot(Su, (F_gpu + mi * G)) # Sw*G for e SwG[mi] = gpu.dot(Sw, G) utils.print_info('stage2 done') # print SuFG for i in range(n_class): # print l, i nn_class = cur[i].shape[0] # print nn_class cur_gpu = gpu.garray(cur[i]) # formula 7 in suppl_760 temp = gpu.dot(SuFG[nn_class], cur_gpu.T) u_gpu[:, i] = gpu.sum(temp, 1) # formula 8 in suppl_760 ep_gpu[:, nowp:nowp + nn_class] = cur_gpu.T + \ gpu.sum(gpu.dot(SwG[nn_class], cur_gpu.T), 1).reshape(n_dim, 1) nowp = nowp + nn_class print 'stage2 done' Su = cov(u_gpu) Sw = cov(ep_gpu) convergence = np.linalg.norm( (Sw - oldSw).as_numpy_array()) / np.linalg.norm( Sw.as_numpy_array()) utils.print_info("Iterations-" + str(l) + ": " + str(convergence)) if convergence < 1e-6: print "Convergence: ", l, convergence break oldSw = Sw if convergence < min_convergence: min_convergence = convergence F = np.linalg.pinv(Sw.as_numpy_array()) F_gpu = gpu.garray(F) G = -gpu.dot( gpu.dot(np.linalg.pinv((2 * Su + Sw).as_numpy_array()), Su.as_numpy_array()), F_gpu) A = np.linalg.pinv((Su + Sw).as_numpy_array()) - \ (F + G.as_numpy_array()) utils.data_to_pkl(G, fold + "G.pkl") utils.data_to_pkl(A, fold + "A.pkl") F = np.linalg.pinv(Sw.as_numpy_array()) F_gpu = gpu.garray(F) temp = gpu.garray(np.linalg.pinv((2 * Su + Sw).as_numpy_array())) G = -gpu.dot(gpu.dot(temp, Su), F_gpu).as_numpy_array() A = np.linalg.pinv((Su + Sw).as_numpy_array()) - (F + G) utils.data_to_pkl(G, fold + "G_con.pkl") utils.data_to_pkl(A, fold + "A_con.pkl") return A, G
def test_compare_rbm_with_ruslan(): """Trains own RBM and compares average likelihood on training and test set with RBM trained by Ruslan""" ref_file = "test/rbm-for-ais-test.mat" iterations = 10 alpha = 0.10 mdata = scipy.io.loadmat(ref_file) ref_logpf = mdata['logZZ_est'][0,0] ref_logpf_low = mdata['logZZ_est_down'][0,0] ref_logpf_high = mdata['logZZ_est_up'][0,0] ref_ll_training = mdata['loglik_training_est'][0,0] ref_ll_test = mdata['loglik_test_est'][0,0] n_hid = int(mdata['numhid'][0,0]) cd = int(mdata['CD'][0,0]) epochs = int(mdata['maxepoch'][0,0]) os.chdir("test-tmp") ll_trainings = [] ll_tests = [] print "Running %d iterations" % iterations for i in range(iterations): tcfg = rbm.config.TrainingConfiguration(dataset='rmnist', n_vis=784, n_hid=n_hid, batch_size=100, n_gibbs_steps=cd, epochs=epochs, step_rate=0.05, use_pcd=False, binarize_data=True, initial_momentum=0.5, final_momentum=0.9, use_final_momentum_from_epoch=5, weight_cost=0.0002, init_weight_sigma=0.01, init_bias_sigma=0, seed=random.randint(0, 100000)) myrbm = rbm.rbm.train_rbm(tcfg) ais = rbm.ais.AnnealedImportanceSampler(myrbm) ais.init_using_dataset(tcfg.X) betas = np.concatenate((np.linspace(0.0, 0.5, 500, endpoint=False), np.linspace(0.5, 0.9, 4000, endpoint=False), np.linspace(0.9, 1.0, 10000))) logpf, logpf_low, logpf_high = ais.log_partition_function(betas=betas, ais_runs=100) myrbm.log_pf = logpf print "Test: log Z = %g (%g, %g)" % (logpf, logpf_low, logpf_high) print "Reference: log Z = %g (%g, %g)" % (ref_logpf, ref_logpf_low, ref_logpf_high) ll_training = gp.mean(myrbm.normalized_log_p_vis(tcfg.X)) ll_test = gp.mean(myrbm.normalized_log_p_vis(tcfg.TX)) print "Test: Average log p(x from training set) = %f" % ll_training print "Test: Average log p(x from test set) = %f" % ll_test ll_trainings.append(ll_training) ll_tests.append(ll_test) ll_training_mean, ll_training_pm = common.stats.normal_mean(ll_trainings, alpha) ll_test_mean, ll_test_pm = common.stats.normal_mean(ll_tests, alpha) print print "Reference: Average log p(x from training set) = %f" % ref_ll_training print "Test: <Average log p(x from training set)> = %f +/- %f" % \ (ll_training_mean, ll_training_pm) print "Reference: Average log p(x from test set) = %f" % ref_ll_test print "Test: <Average log p(x from test set)> = %f +/- %f" % \ (ll_test_mean, ll_test_pm) assert common.util.interval_contains(common.stats.normal_mean_confint(ll_trainings, alpha), ref_ll_training) or ll_training_mean > ref_ll_training assert common.util.interval_contains(common.stats.normal_mean_confint(ll_tests, alpha), ref_ll_test) or ll_test_mean > ref_ll_test
def test_compare_rbm_with_lisa(): """Trains own RBM and compares average likelihood on training and test set with RBM trained by the Deep Learning Tutorials""" ref_file = "test/lisa-rbm.npz" iterations = 10 #iterations = 2 alpha = 0.10 common.show_progress = True n_vis=784 n_hid=500 init_weight_sigma = 4 * np.sqrt(6. / (n_hid + n_vis)) refrbm = rbm.rbm.RestrictedBoltzmannMachine(20, n_vis, n_hid, 0) rbm.util.load_parameters(refrbm, ref_file) os.chdir("test-tmp") lls = [] ref_lls = [] print "Running %d iterations" % iterations for i in range(iterations): tcfg = rbm.config.TrainingConfiguration(dataset='mnistv', n_vis=n_vis, n_hid=n_hid, batch_size=20, n_gibbs_steps=15, epochs=15, step_rate=0.1, use_pcd=True, binarize_data=False, initial_momentum=0, final_momentum=0, use_final_momentum_from_epoch=0, weight_cost=0, init_method='uniform', init_weight_sigma=init_weight_sigma, init_bias_sigma=0, seed=random.randint(0, 100000)) rbm.util.enter_rbm_plot_directory(tcfg) myrbm = rbm.rbm.train_rbm(tcfg) # AIS on my RBM ais = rbm.ais.AnnealedImportanceSampler(myrbm) ais.init_using_dataset(tcfg.X) betas = np.concatenate((np.linspace(0.0, 0.5, 500, endpoint=False), np.linspace(0.5, 0.9, 4000, endpoint=False), np.linspace(0.9, 1.0, 10000))) logpf, logpf_low, logpf_high = ais.log_partition_function(betas=betas, ais_runs=100) myrbm.log_pf = logpf print "Test: log Z = %g (%g, %g)" % (logpf, logpf_low, logpf_high) # log likelihood of my RBM ll = gp.mean(myrbm.normalized_log_p_vis(tcfg.X)) lls.append(ll) print "Test: Average log p(x from training set) = %f" % ll # AIS on reference RBM refais = rbm.ais.AnnealedImportanceSampler(refrbm) refais.init_using_dataset(tcfg.X) betas = np.concatenate((np.linspace(0.0, 0.5, 500, endpoint=False), np.linspace(0.5, 0.9, 4000, endpoint=False), np.linspace(0.9, 1.0, 10000))) logpf, logpf_low, logpf_high = ais.log_partition_function(betas=betas, ais_runs=100) refrbm.log_pf = logpf print "Reference: log Z = %g (%g, %g)" % (logpf, logpf_low, logpf_high) # log likelihood of my RBM ref_ll = gp.mean(refrbm.normalized_log_p_vis(tcfg.X)) ref_lls.append(ref_ll) print "Test: Average log p(x from training set) = %f" % ref_ll # print statistics ll_mean, ll_pm = common.stats.normal_mean(lls, alpha) ref_ll_mean, ref_ll_pm = common.stats.normal_mean(ref_lls, alpha) print "############################################################" print "After %d iterations:" % i print "Test: <Average log p(x from training set)> = %g +/- %g" % \ (ll_mean, ll_pm) print "Reference: <Average log p(x from training set)> = %g +/- %g" % \ (ref_ll_mean, ref_ll_pm) assert common.util.interval_contains(common.stats.normal_mean_confint(lls, alpha), ref_ll_mean) or ll_mean > ref_ll_mean
def reject_outliers(data, m=3): data = np.array(data) outlier_idx = np.where(abs(data - gpu.mean(data)) >= m * np.std(data)) data[outlier_idx] = np.inf return(data)
def rbm_train(dataset, H, batch_size, epoch_count, epsilon, momentum, return_hidden=True, verbose=True): """ Train a (binary) restricted boltzmann machine. dataset: Input data. DataSet instance or matrix of size N (number of data points) x D (input dimension) H: Number of hidden units batch_size: Number of data points in each batch epoch_count: Number of training epochs epsilon: Learning rate, either a scalar or an array (one value for each epoch) momentum: Momentum parameter, either a scalar or an array (one value for each epoch) return_hidden: If True, returns hidden unit activations for training data. verbose: If True, prints progress information Returns w_vh (weights between visible-hidden units), w_v (visible unit biases), w_h (hidden unit biases), h (hidden unit activations for input data), error (reconstruction error at each epoch) """ if isinstance(dataset, ds.DataSet): train_x = dataset.train.x N = dataset.train.N D = dataset.train.D else: train_x = dataset N = train_x.shape[0] D = train_x.shape[1] batch_count = int(np.ceil(N / float(batch_size))) # if momentum is a scalar, create a list with the same value for all epochs if not isinstance(momentum, list): momentum = [momentum] * epoch_count if not isinstance(epsilon, list): epsilon = [epsilon] * epoch_count # initialize weights w_vh = gnp.randn((D, H)) * 0.1 w_v = gnp.zeros((D, 1)) w_h = gnp.zeros((H, 1)) # weight updates dw_vh = gnp.zeros((D, H)) dw_v = gnp.zeros((D, 1)) dw_h = gnp.zeros((H, 1)) # hidden unit activations if return_hidden: h = np.zeros((N, H)) # keep this a numpy array to save memory else: h = [] start_time = time.time() # reconstruction errors over epochs error = [] batch_order = range(batch_count) for e in range(epoch_count): if verbose: print("Epoch " + repr(e + 1)) batch_error = [] processed_batch = 0 for b in range(batch_count): processed_batch += 1 if verbose: print("\r%d/%d" % (processed_batch, batch_count)), start = b * batch_size end = (b + 1) * batch_size if (b + 1) * batch_size < N else N x = train_x[start:end, :].T # apply momentum dw_vh *= momentum[e] dw_v *= momentum[e] dw_h *= momentum[e] # positive phase ahp = gnp.dot(w_vh.T, x) + w_h hp = gnp.logistic(ahp) # if it is the last epoch, store hidden unit activations if return_hidden and e == epoch_count - 1: h[start:end, :] = gnp.as_numpy_array(hp.T) # add positive gradient term dw_vh += gnp.dot(x, hp.T) dw_v += gnp.sum(x, axis=1)[:, gnp.newaxis] dw_h += gnp.sum(hp, axis=1)[:, gnp.newaxis] # sample hiddens hs = hp > gnp.rand(hp.shape[0], hp.shape[1]) # negative phase avn = gnp.dot(w_vh, hs) + w_v vn = gnp.logistic(avn) ahn = gnp.dot(w_vh.T, vn) + w_h hn = gnp.logistic(ahn) dw_vh -= gnp.dot(vn, hn.T) dw_v -= gnp.sum(vn, axis=1)[:, gnp.newaxis] dw_h -= gnp.sum(hn, axis=1)[:, gnp.newaxis] # update weights w_vh += epsilon[e] / (end - start) * dw_vh w_v += epsilon[e] / (end - start) * dw_v w_h += epsilon[e] / (end - start) * dw_h batch_error.append(gnp.mean((vn - x) ** 2)) # shuffle batch order np.random.shuffle(batch_order) error.append(np.mean(batch_error)) if verbose: print("\nReconstruction error: " + repr(error[-1])) print("Elapsed time: " + str(time.time() - start_time)) return w_vh, w_v, w_h, h, error
def train(self, fulldata, num_epochs, eta=0.01, hidden=None, sample=False, early_stop=True, verbose = True): ''' Method to learn the weights of the RBM. args: array fulldata: the training xs int num_epochs: the number of times to run through the training xs float eta: the learning rate, default 0.01 array hidden: optional array specifying the hidden representation to learn (for use in a translational-RBM) bool sample: specifies whether training should use sampling, default False bool early_stop: whether to use early stopping, default True ''' if len(fulldata) == 0: return if type(fulldata) != self.np_array_type or type(fulldata[0]) != self.np_array_type: fulldata = np.array([np.array(r) for r in fulldata]) if hidden is not None: # check that there is a hidden rep for each xs row assert hidden.shape[0] == xs.shape[0] # check that we have the right number of hidden units assert hidden.shape[1] == self.n_hidden # these parameters control momentum changes initial_momentum = 0.5 final_momentum = 0.9 momentum_iter = 5 # when dealing with large arrays, we have to break the xs into # manageable chunks to avoid out of memory mae num_rows = fulldata.shape[0] err_hist = [] # keep track of the errors for early stopping for epoch in range(num_epochs): if epoch <= momentum_iter: momentum = initial_momentum else: momentum = final_momentum mae = [] if verbose: print "Training epoch %d of %d," %(epoch+1, num_epochs), num_batches = num_rows/self.batch_size + 1 xs = gp.garray(fulldata) if hidden is not None: hid_chunk = gp.garray(hidden) for batch in range(num_batches): # positive phase if num_batches == 1: v1 = xs else: v1 = xs[batch*self.batch_size:(batch+1)*self.batch_size] if len(v1) == 0: continue if hidden is None: h1 = self.prop_up(v1) else: if num_batches == 1: h1 = hid_chunk else: h1 = hid_chunk[batch*self.batch_size:(batch+1)*self.batch_size] # negative phase if sample: hSampled = h1.rand() < h1 v2 = self.prop_down(hSampled) else: v2 = self.prop_down(h1) h2 = self.prop_up(v2) # update weights self.wu_vh = self.wu_vh * momentum + gp.dot(v1.T, h1) - gp.dot(v2.T, h2) self.wu_v = self.wu_v * momentum + v1.sum(0) - v2.sum(0) self.wu_h = self.wu_h * momentum + h1.sum(0) - h2.sum(0) self.W += self.wu_vh * (eta/self.batch_size) self.vbias += self.wu_v * (eta/self.batch_size) self.hbias += self.wu_h * (eta/self.batch_size) # calculate reconstruction error error = gp.abs(v2 - v1) #mae.append(error.euclid_norm()**2/(self.n_visible*self.batch_size)) mae.append(gp.mean(error)) err_hist.append(np.mean(mae)) if verbose: print " mean absolute error: "+ str(np.mean(mae)) # early stopping if early_stop: recent_err = np.mean(err_hist[epoch-50:epoch]) early_err = np.mean(err_hist[epoch-200:epoch-150]) if (epoch > 250) and ((recent_err * 1.2) > early_err): break
def columnRMS(W): return gnp.sqrt(gnp.mean(W * W, axis=0))