def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 model = iRBM(input_size=input_size, hidden_size=32, beta=1.01, CDk=1, rng=np.random.RandomState(42)) W = rng.rand(model.hidden_size, model.input_size).astype(theano.config.floatX) model.W = theano.shared(value=W.astype(theano.config.floatX), name='W', borrow=True) b = rng.rand(model.hidden_size).astype(theano.config.floatX) model.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = rng.rand(model.input_size).astype(theano.config.floatX) model.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [model.W, model.b, model.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(model.free_energy(chain_start)) - T.mean(model.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = RBM.sample_h_given_v(model, chain_start, return_probs=True) _h = RBM.sample_h_given_v(model, chain_end, return_probs=True) icdf = model.icdf_z_given_v(chain_start) _icdf = model.icdf_z_given_v(chain_end) if model.penalty == "softplus_bi": penalty = model.beta * T.nnet.sigmoid(model.b) elif self.penalty == "softplus0": penalty = model.beta * T.nnet.sigmoid(0) grad_W = (T.dot(chain_end.T, _h*_icdf) - T.dot(chain_start.T, h*icdf)).T / batch_size grad_b = T.mean((_h-penalty)*_icdf - (h-penalty)*icdf, axis=0) grad_c = T.mean(chain_end - chain_start, axis=0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value}) param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value}) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name, decimal=5) # decimal=5 needed for float32
def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 rbm = RBM(input_size=input_size, hidden_size=32, CDk=1, rng=np.random.RandomState(42)) W = (rng.rand(rbm.hidden_size, rbm.input_size) > 0.5).astype(theano.config.floatX) rbm.W = theano.shared(value=W.astype(theano.config.floatX), name='b', borrow=True) b = (rng.rand(rbm.hidden_size) > 0.5).astype(theano.config.floatX) rbm.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = (rng.rand(rbm.input_size) > 0.5).astype(theano.config.floatX) rbm.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [rbm.W, rbm.b, rbm.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype(theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(rbm.free_energy(chain_start)) - T.mean(rbm.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = rbm.sample_h_given_v(chain_start, return_probs=True) _h = rbm.sample_h_given_v(chain_end, return_probs=True) grad_W = (T.dot(chain_end.T, _h) - T.dot(chain_start.T, h)).T / batch_size grad_b = T.mean(_h - h, 0) grad_c = T.mean(chain_end - chain_start, 0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({chain_start: chain_start_value, chain_end: chain_end_value}) param2 = gparam_manual.eval({chain_start: chain_start_value, chain_end: chain_end_value}) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
def test_gradients_auto_vs_manual(self): rng = np.random.RandomState(42) batch_size = 5 input_size = 10 model = oRBM(input_size=input_size, hidden_size=32, CDk=1, rng=np.random.RandomState(42)) W = rng.rand(model.hidden_size, model.input_size).astype(theano.config.floatX) model.W = theano.shared(value=W.astype(theano.config.floatX), name='W', borrow=True) b = rng.rand(model.hidden_size).astype(theano.config.floatX) model.b = theano.shared(value=b.astype(theano.config.floatX), name='b', borrow=True) c = rng.rand(model.input_size).astype(theano.config.floatX) model.c = theano.shared(value=c.astype(theano.config.floatX), name='c', borrow=True) params = [model.W, model.b, model.c] chain_start = T.matrix('start') chain_end = T.matrix('end') chain_start_value = (rng.rand(batch_size, input_size) > 0.5).astype( theano.config.floatX) chain_end_value = (rng.rand(batch_size, input_size) > 0.5).astype( theano.config.floatX) chain_start.tag.test_value = chain_start_value chain_end.tag.test_value = chain_end_value ### Computing gradients using automatic differentation ### cost = T.mean(model.free_energy(chain_start)) - T.mean( model.free_energy(chain_end)) gparams_auto = T.grad(cost, params, consider_constant=[chain_end]) ### Computing gradients manually ### h = RBM.sample_h_given_v(model, chain_start, return_probs=True) _h = RBM.sample_h_given_v(model, chain_end, return_probs=True) icdf = model.icdf_z_given_v(chain_start) _icdf = model.icdf_z_given_v(chain_end) if model.penalty == "softplus_bi": penalty = model.beta * T.nnet.sigmoid(model.b) elif self.penalty == "softplus0": penalty = model.beta * T.nnet.sigmoid(0) grad_W = (T.dot(chain_end.T, _h * _icdf) - T.dot(chain_start.T, h * icdf)).T / batch_size grad_b = T.mean((_h - penalty) * _icdf - (h - penalty) * icdf, axis=0) grad_c = T.mean(chain_end - chain_start, axis=0) gparams_manual = [grad_W, grad_b, grad_c] grad_W.name, grad_b.name, grad_c.name = "grad_W", "grad_b", "grad_c" for gparam_auto, gparam_manual in zip(gparams_auto, gparams_manual): param1 = gparam_auto.eval({ chain_start: chain_start_value, chain_end: chain_end_value }) param2 = gparam_manual.eval({ chain_start: chain_start_value, chain_end: chain_end_value }) assert_array_almost_equal(param1, param2, err_msg=gparam_manual.name)
def main(): parser = buildArgsParser() args = parser.parse_args() # Check that a least one of --view or --save has been given. if not args.view and not args.save: parser.error("At least one the following options must be chosen: --view or --save") # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) if not os.path.isfile(pjoin(experiment_path, "model.pkl")): parser.error('Cannot find model for experiment: {0}!'.format(experiment_path)) if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")): parser.error('Cannot find hyperparams for experiment: {0}!'.format(experiment_path)) # Load experiments hyperparameters hyperparams = utils.load_dict_from_json_file(pjoin(experiment_path, "hyperparams.json")) with Timer("Loading model"): if hyperparams["model"] == "rbm": from iRBM.models.rbm import RBM model_class = RBM elif hyperparams["model"] == "orbm": from iRBM.models.orbm import oRBM model_class = oRBM elif hyperparams["model"] == "irbm": from iRBM.models.irbm import iRBM model_class = iRBM # Load the actual model. model = model_class.load(pjoin(experiment_path, "model.pkl")) rng = np.random.RandomState(args.seed) # Sample from uniform # TODO: sample from Bernouilli distribution parametrized with visible biases chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype(theano.config.floatX) with Timer("Building sampling function"): v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX)) v1 = model.gibbs_step(v0) gibbs_step = theano.function([], updates={v0: v1}) if args.full_gibbs_step: print "Using z=K" # Use z=K for first Gibbs step. from iRBM.models.rbm import RBM h0 = RBM.sample_h_given_v(model, v0) v1 = RBM.sample_v_given_h(model, h0) v0.set_value(v1.eval()) with Timer("Sampling"): for k in range(args.cdk): gibbs_step() samples = v0.get_value() if args.save: np.savez(args.out, samples) if args.view: if hyperparams["dataset"] == "binarized_mnist": image_shape = (28, 28) elif hyperparams["dataset"] == "caltech101_silhouettes28": image_shape = (28, 28) else: raise ValueError("Unknown dataset: {0}".format(hyperparams["dataset"])) data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1)) plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest') plt.show()
def main(): parser = buildArgsParser() args = parser.parse_args() # Check that a least one of --view or --save has been given. if not args.view and not args.save: parser.error( "At least one the following options must be chosen: --view or --save" ) # Get experiment folder experiment_path = args.name if not os.path.isdir(experiment_path): # If not a directory, it must be the name of the experiment. experiment_path = pjoin(".", "experiments", args.name) if not os.path.isdir(experiment_path): parser.error('Cannot find experiment: {0}!'.format(args.name)) if not os.path.isfile(pjoin(experiment_path, "model.pkl")): parser.error( 'Cannot find model for experiment: {0}!'.format(experiment_path)) if not os.path.isfile(pjoin(experiment_path, "hyperparams.json")): parser.error('Cannot find hyperparams for experiment: {0}!'.format( experiment_path)) # Load experiments hyperparameters hyperparams = utils.load_dict_from_json_file( pjoin(experiment_path, "hyperparams.json")) with Timer("Loading model"): if hyperparams["model"] == "rbm": from iRBM.models.rbm import RBM model_class = RBM elif hyperparams["model"] == "orbm": from iRBM.models.orbm import oRBM model_class = oRBM elif hyperparams["model"] == "irbm": from iRBM.models.irbm import iRBM model_class = iRBM # Load the actual model. model = model_class.load(pjoin(experiment_path, "model.pkl")) rng = np.random.RandomState(args.seed) # Sample from uniform # TODO: sample from Bernouilli distribution parametrized with visible biases chain_start = (rng.rand(args.nb_samples, model.input_size) > 0.5).astype( theano.config.floatX) with Timer("Building sampling function"): v0 = theano.shared(np.asarray(chain_start, dtype=theano.config.floatX)) v1 = model.gibbs_step(v0) gibbs_step = theano.function([], updates={v0: v1}) if args.full_gibbs_step: print "Using z=K" # Use z=K for first Gibbs step. from iRBM.models.rbm import RBM h0 = RBM.sample_h_given_v(model, v0) v1 = RBM.sample_v_given_h(model, h0) v0.set_value(v1.eval()) with Timer("Sampling"): for k in range(args.cdk): gibbs_step() samples = v0.get_value() if args.save: np.savez(args.out, samples) if args.view: if hyperparams["dataset"] == "binarized_mnist": image_shape = (28, 28) elif hyperparams["dataset"] == "caltech101_silhouettes28": image_shape = (28, 28) else: raise ValueError("Unknown dataset: {0}".format( hyperparams["dataset"])) data = vizu.concatenate_images(samples, shape=image_shape, border_size=1, clim=(0, 1)) plt.imshow(data, cmap=plt.cm.gray, interpolation='nearest') plt.show()