def train_rbm(tcfg, print_cost=False): """Trains and returns an RBM using the specified RestrictedBoltzmannMachineTrainingConfiguration""" # seed RNGs gp.seed_rand(tcfg.seed) # Build RBM rbm = RestrictedBoltzmannMachine(tcfg.batch_size, tcfg.n_vis, tcfg.n_hid, tcfg.n_gibbs_steps, tcfg.init_weight_sigma, tcfg.init_bias_sigma) # initialize momentums weights_update = 0 bias_vis_update = 0 bias_hid_update = 0 # train for epoch in range(tcfg.epochs): seen_epoch_samples = 0 if print_cost: pl_bit = 0 pl_sum = 0 rc_sum = 0 for x in draw_slices(tcfg.X, tcfg.batch_size, kind='sequential', samples_are='rows', stop=True): #print >>stderr, "%d / %d (epoch: %d / %d)\r" % (seen_epoch_samples, # tcfg.X.shape[0], # epoch, tcfg.epochs), # binaraize x if tcfg.binarize_data: x = sample_binomial(x) # perform weight update if tcfg.use_pcd: weights_step, bias_vis_step, bias_hid_step = ml.rbm.pcd_update(x) else: weights_step, bias_vis_step, bias_hid_step = ml.rbm.cd_update(x) if epoch >= tcfg.use_final_momentum_from_epoch: momentum = tcfg.final_momentum else: momentum = tcfg.initial_momentum weights_update = momentum * weights_update + \ tcfg.step_rate * (weights_step - tcfg.weight_cost * ml.rbm.weights) bias_vis_update = momentum * bias_vis_update + tcfg.step_rate * bias_vis_step bias_hid_update = momentum * bias_hid_update + tcfg.step_rate * bias_hid_step ml.rbm.weights += weights_update ml.rbm.bias_vis += bias_vis_update ml.rbm.bias_hid += bias_hid_update seen_epoch_samples += tcfg.batch_size if print_cost: # calculate part of pseudo-likelihood pl_sum += gp.sum(rbm.pseudo_likelihood_for_bit(x > 0.5, pl_bit)) pl_bit = (pl_bit + 1) % tcfg.X.shape[1] # calculate part of reconstruction cost rc_sum += gp.sum(rbm.reconstruction_cross_entropy(x > 0.5)) ############################################# # end of batch # save parameters save_parameters(rbm, epoch) # plot weights and current state of PCD chains plot_weights(rbm, epoch) if tcfg.use_pcd: plot_pcd_chains(rbm, epoch) if print_cost: # calculate pseudo likelihood and reconstruction cost pl = pl_sum / seen_epoch_samples * tcfg.X.shape[1] rc = rc_sum / seen_epoch_samples print "Epoch %02d: reconstruction cost=%f, pseudo likelihood=%f" % \ (epoch, rc, pl) return rbm
rbmutil.enter_rbm_plot_directory("mnist", cfg.n_hid, cfg.use_pcd, cfg.n_gibbs_steps, clean=False) # Build RBM rbm = RestrictedBoltzmannMachine(0, cfg.n_vis, cfg.n_hid, 0) rbmutil.load_parameters(rbm, "weights-%02i.npz" % epoch) #rbmutil.load_parameters("../../../DeepLearningTutorials/code/rbm_plots/GPU-PCD/weights.npz") #epoch = 99 # calculate statistics seen_epoch_samples = 0 pl_bit = 0 pl_sum = 0 rc_sum = 0 for x in util.draw_slices(X, cfg.batch_size, kind='sequential', samples_are='rows', stop=True): print "%d / %d \r" % (seen_epoch_samples, X.shape[0]), seen_epoch_samples += cfg.batch_size # calculate part of pseudo-likelihood pl_sum += gp.sum(rbm.pseudo_likelihood_for_bit(x > 0.5, pl_bit)) pl_bit = (pl_bit + 1) % X.shape[1] # calculate part of reconstruction cost rc_sum += gp.sum(rbm.reconstruction_cross_entropy(x > 0.5)) ############################################# # end of batch: evaluate performance of model # plot weights rbmutil.plot_weights(rbm, epoch)