def _build_rnnrbm(self): """ Creates the updates and other return variables for the computation graph. Returns ------- List the sample at the end of the computation graph, the train cost function, the train monitors, the computation updates, the generated visible list, the generated computation updates, the ending recurrent states """ # For training, the deterministic recurrence is used to compute all the # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained # in batches using those parameters. (u_ts, bv_ts, bh_ts), updates_train = theano.scan( fn=lambda v_t, u_tm1: self.recurrence(v_t, u_tm1), sequences=self.input, outputs_info=[self.u0, None, None], name="rnnrbm_computation_scan") rbm = RBM(inputs_hook=(self.input_size, self.input), params_hook=(self.W, bv_ts[:], bh_ts[:]), visible_activation=self.visible_activation_func, hidden_activation=self.hidden_activation_func, k=self.k, outdir=os.path.join(self.outdir, 'rbm'), mrg=self.mrg) v_sample = rbm.get_outputs() cost = rbm.get_train_cost() monitors = rbm.get_monitors() updates_rbm = rbm.get_updates() # make another chain to determine frame-level accuracy/error (this one is one step in the future) rbm = RBM(inputs_hook=(self.input_size, self.input[:-1]), params_hook=(self.W, bv_ts[1:], bh_ts[1:]), k=self.k, outdir=os.path.join(self.outdir, 'rbm'), mrg=self.mrg) v_prediction = rbm.get_outputs() updates_predict = rbm.get_updates() frame_level_mse = T.mean(T.sqr(v_sample[1:] - v_prediction), axis=0) frame_level_error = T.mean(frame_level_mse) # add the frame-level error to the monitors monitors['mse'] = frame_level_error updates_train.update(updates_rbm) updates_train.update(updates_predict) # symbolic loop for sequence generation (v_ts, u_ts), updates_generate = theano.scan( lambda u_tm1: self.recurrence(None, u_tm1), outputs_info=[None, self.generate_u0], n_steps=self.n_steps, name="rnnrbm_generate_scan") return v_sample, cost, monitors, updates_train, v_ts, updates_generate, u_ts[ -1]
def _build_rnnrbm(self): """ Creates the updates and other return variables for the computation graph. Returns ------- List the sample at the end of the computation graph, the train cost function, the train monitors, the computation updates, the generated visible list, the generated computation updates, the ending recurrent states """ # For training, the deterministic recurrence is used to compute all the # {bv_t, bh_t, 1 <= t <= T} given v. Conditional RBMs can then be trained # in batches using those parameters. (u_ts, bv_ts, bh_ts), updates_train = theano.scan(fn=lambda v_t, u_tm1: self.recurrence(v_t, u_tm1), sequences=self.input, outputs_info=[self.u0, None, None], name="rnnrbm_computation_scan") rbm = RBM(inputs_hook=(self.input_size, self.input), params_hook=(self.W, bv_ts[:], bh_ts[:]), visible_activation=self.visible_activation_func, hidden_activation=self.hidden_activation_func, k=self.k, outdir=os.path.join(self.outdir, 'rbm'), mrg=self.mrg) v_sample = rbm.get_outputs() cost = rbm.get_train_cost() monitors = rbm.get_monitors() updates_rbm = rbm.get_updates() # make another chain to determine frame-level accuracy/error (this one is one step in the future) rbm = RBM(inputs_hook=(self.input_size, self.input[:-1]), params_hook=(self.W, bv_ts[1:], bh_ts[1:]), k=self.k, outdir=os.path.join(self.outdir, 'rbm'), mrg=self.mrg) v_prediction = rbm.get_outputs() updates_predict = rbm.get_updates() frame_level_mse = T.mean(T.sqr(v_sample[1:] - v_prediction), axis=0) frame_level_error = T.mean(frame_level_mse) # add the frame-level error to the monitors monitors['mse'] = frame_level_error updates_train.update(updates_rbm) updates_train.update(updates_predict) # symbolic loop for sequence generation (v_ts, u_ts), updates_generate = theano.scan(lambda u_tm1: self.recurrence(None, u_tm1), outputs_info=[None, self.generate_u0], n_steps=self.n_steps, name="rnnrbm_generate_scan") return v_sample, cost, monitors, updates_train, v_ts, updates_generate, u_ts[-1]
def recurrence(self, v_t, u_tm1): """ The single recurrent step for the model Parameters ---------- v_t : tensor The input (visible layer) at time t. u_tm1 : tensor The previous timestep (t-1) recurrent hiddens. Returns ------- tuple Current generated visible v_t, recurrent u_t, and computation updates if generating (no v_t given as parameter), otherwise current recurrent u_t, visible bias bv_t, and hiddens bias bh_t. """ # generate the current rbm bias params bv_t = self.bv + T.dot(u_tm1, self.Wuv) bh_t = self.bh + T.dot(u_tm1, self.Wuh) # if we should be generating from the recurrent model generate = v_t is None updates = None if generate: rbm = RBM(inputs_hook=(self.input_size, T.zeros( (self.input_size, ))), params_hook=(self.W, bv_t, bh_t), visible_activation=self.visible_activation_func, hidden_activation=self.hidden_activation_func, k=self.k, outdir=os.path.join(self.outdir, 'rbm'), mrg=self.mrg) v_t = rbm.get_outputs() updates = rbm.get_updates() # update recurrent hiddens u_t = T.tanh(self.bu + T.dot(v_t, self.Wvu) + T.dot(u_tm1, self.Wuu)) return ([v_t, u_t], updates) if generate else (u_t, bv_t, bh_t)
def recurrence(self, v_t, u_tm1): """ The single recurrent step for the model Parameters ---------- v_t : tensor The input (visible layer) at time t. u_tm1 : tensor The previous timestep (t-1) recurrent hiddens. Returns ------- tuple Current generated visible v_t, recurrent u_t, and computation updates if generating (no v_t given as parameter), otherwise current recurrent u_t, visible bias bv_t, and hiddens bias bh_t. """ # generate the current rbm bias params bv_t = self.bv + T.dot(u_tm1, self.Wuv) bh_t = self.bh + T.dot(u_tm1, self.Wuh) # if we should be generating from the recurrent model generate = v_t is None updates = None if generate: rbm = RBM(inputs_hook=(self.input_size, T.zeros((self.input_size,))), params_hook=(self.W, bv_t, bh_t), visible_activation=self.visible_activation_func, hidden_activation=self.hidden_activation_func, k=self.k, outdir=os.path.join(self.outdir, 'rbm'), mrg=self.mrg) v_t = rbm.get_outputs() updates = rbm.get_updates() # update recurrent hiddens u_t = T.tanh(self.bu + T.dot(v_t, self.Wvu) + T.dot(u_tm1, self.Wuu)) return ([v_t, u_t], updates) if generate else (u_t, bv_t, bh_t)
if __name__ == '__main__': # set up the logging environment to display outputs (optional) # although this is recommended over print statements everywhere import logging import opendeep.log.logger as logger logger.config_root_logger() log = logging.getLogger(__name__) log.info("Creating RBM!") # grab the MNIST dataset mnist = MNIST(concat_train_valid=False) # create the RBM rng = numpy.random.RandomState(1234) mrg = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(2**30)) rbm = RBM(input_size=28*28, hidden_size=500, k=15, weights_init='uniform', weights_interval=4*numpy.sqrt(6./(28*28+500)), mrg=mrg) # rbm.load_params('rbm_trained.pkl') # make an optimizer to train it (AdaDelta is a good default) # optimizer = SGD(model=rbm, dataset=mnist, batch_size=20, learning_rate=0.1, lr_decay=False, nesterov_momentum=False, momentum=False) optimizer = Optimizer(lr_decay=False, learning_rate=0.1, model=rbm, dataset=mnist, batch_size=20, save_frequency=1) ll = Monitor('pseudo-log', rbm.get_monitors()['pseudo-log']) # perform training! optimizer.train(monitor_channels=ll) # test it on some images! test_data = mnist.getSubset(TEST)[0] test_data = test_data[:25].eval() # use the run function!
if __name__ == '__main__': # set up the logging environment to display outputs (optional) # although this is recommended over print statements everywhere import logging import opendeep.log.logger as logger logger.config_root_logger() log = logging.getLogger(__name__) log.info("Creating RBM!") # grab the MNIST dataset mnist = MNIST(concat_train_valid=False) # create the RBM rng = numpy.random.RandomState(1234) mrg = theano.tensor.shared_randomstreams.RandomStreams(rng.randint(2**30)) rbm = RBM(input_size=28*28, hidden_size=500, k=15, weights_init='uniform', weights_interval=4*numpy.sqrt(6./(28*28+500)), rng=rng) # rbm.load_params('rbm_trained.pkl') # make an optimizer to train it (AdaDelta is a good default) optimizer = SGD(model=rbm, dataset=mnist, n_epoch=15, batch_size=20, learning_rate=0.1, lr_decay=False, nesterov_momentum=False) # optimizer = AdaDelta(model=rbm, dataset=mnist, n_epoch=200, batch_size=100, learning_rate=1e-6) # perform training! optimizer.train() # test it on some images! test_data = mnist.getSubset(TEST)[0] test_data = test_data[:25].eval() # use the run function! preds = rbm.run(test_data) # Construct image from the test matrix image = Image.fromarray( tile_raster_images(