def test_base_rate(self): # All binary combinaisons for V and H. V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: base_rate_lnZ = base_rate.compute_lnZ().eval().astype( config.floatX) brute_force_lnZ = logsumexp(-base_rate.E(V, H)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.marginalize_over_v(H)).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)
def test_compute_lnZ(self): v = T.matrix('v') z = T.iscalar('z') V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) #H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) # We simulate having an infinite number of hidden units by adding lot of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=self.model.input_size, hidden_size=self.model.hidden_size + nb_hidden_units_to_add, beta=self.model.beta.get_value()) model.W.set_value(np.r_[self.model.W.get_value(), np.zeros((nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)]) model.b.set_value(np.r_[self.model.b.get_value(), np.zeros((nb_hidden_units_to_add,), dtype=theano.config.floatX)]) model.c.set_value(self.model.c.get_value()) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) energies = [] for z in range(1, model.hidden_size+1): energies.append(F_vz(V, z)) lnZ = logsumexp(-np.array(energies)).eval() lnZ_using_free_energy = theano.function([v], logsumexp(-self.model.free_energy(v))) assert_almost_equal(lnZ_using_free_energy(V), lnZ, decimal=5) # decimal=5 needed for float32
def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') logsumexp_E = theano.function([v, h, z], -logsumexp(-self.model.E(v, h, z))) F_vz = theano.function([v, z], self.model.F(v, z)) rng = np.random.RandomState(42) v1 = (rng.rand(1, self.input_size) > 0.5).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) # Check the free energy F(v, z) is correct. for z in range(1, self.hidden_size + 1): h = np.array(H[::2**(self.hidden_size - z)]) free_energy_vz = logsumexp_E(v1, h, z) assert_almost_equal(F_vz(v1, z), free_energy_vz, decimal=6) # We now check that free energy F(v) assumes an infinite number of hidden units. # To do so, we create another model that has an infinite (read a lot) number of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=self.model.input_size, hidden_size=self.model.hidden_size + nb_hidden_units_to_add, beta=self.model.beta.get_value()) model.W.set_value(np.r_[self.model.W.get_value(), np.zeros( (nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)]) model.b.set_value( np.r_[self.model.b.get_value(), np.zeros( (nb_hidden_units_to_add, ), dtype=theano.config.floatX)]) model.c.set_value(self.model.c.get_value()) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) free_energies_vz = [] for z in range(1, model.hidden_size + 1): free_energies_vz.append(F_vz(v1, z)) Fv = -logsumexp(-np.array(free_energies_vz)).eval() v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv], decimal=5) # decimal=5 needed for float32 v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv] * self.batch_size, decimal=5) # decimal=5 needed for float32
def test_base_rate(self): # All binary combinaisons for V and H_z V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) # Construct Hz, a subset of H, using np.NaN as padding. Hz = [] for z in range(1, self.hidden_size + 1): hz = np.array(H[::2**(self.hidden_size - z)]) hz[:, z:] = np.NaN Hz.extend(hz) Hz = np.array(Hz) assert_equal(len(Hz), np.sum(2**(np.arange(self.hidden_size) + 1))) assert_true(len(Hz) < self.hidden_size * 2**self.hidden_size) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. # base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: base_rate_lnZ = base_rate.compute_lnZ().eval().astype( config.floatX) v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') lnZ = theano.function([v, h, z], logsumexp(-base_rate.E(v, h, z))) energies = [] for z in range(1, self.hidden_size + 1): hz = np.array(H[::2**(self.hidden_size - z)]) energies.append(lnZ(V, hz, z)) brute_force_lnZ = logsumexp(np.array(energies)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.marginalize_over_v_z(Hz)).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)
def test_base_rate(self): # All binary combinaisons for V and H_z V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) #H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. # base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: print base_rate base_rate_lnZ = base_rate.compute_lnZ().eval().astype( config.floatX) # We simulate having an infinite number of hidden units by adding lot of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=base_rate.input_size, hidden_size=base_rate.hidden_size + nb_hidden_units_to_add, beta=base_rate.beta.get_value()) model.W = T.join( 0, base_rate.W, np.zeros((nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)) model.b = T.join( 0, base_rate.b, np.zeros((nb_hidden_units_to_add, ), dtype=theano.config.floatX)) model.c = base_rate.c v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) energies = [] for z in range(1, model.hidden_size + 1): energies.append(F_vz(V, z)) brute_force_lnZ = logsumexp(-np.array(energies)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=5) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)
def test_compute_lnZ(self): v = T.matrix('v') h = T.matrix('h') lnZ = theano.function([v, h], logsumexp(-self.model.E(v, h))) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) lnZ_using_free_energy = theano.function([v], logsumexp(-self.model.free_energy(v))) assert_equal(lnZ_using_free_energy(V), lnZ(V, H)) lnZ_using_marginalize_over_v = theano.function([h], logsumexp(-self.model.marginalize_over_v(h))) assert_almost_equal(lnZ_using_marginalize_over_v(H), lnZ(V, H), decimal=6)
def test_verify_AIS(self): model = oRBM(input_size=self.input_size, hidden_size=self.hidden_size, beta=self.beta) model.W.set_value(self.W) model.b.set_value(self.b) model.c.set_value(self.c) # Brute force print "Computing lnZ using brute force (i.e. summing the free energy of all posible $v$)..." V = theano.shared( value=cartesian([(0, 1)] * self.input_size, dtype=config.floatX)) brute_force_lnZ = logsumexp(-model.free_energy(V), 0) f_brute_force_lnZ = theano.function([], brute_force_lnZ) params_bak = [param.get_value() for param in model.parameters] print "Approximating lnZ using AIS..." import time start = time.time() try: ais_working_dir = tempfile.mkdtemp() result = compute_AIS(model, M=self.nb_samples, betas=self.betas, seed=1234, ais_working_dir=ais_working_dir, force=True) logcummean_Z, logcumstd_Z_down, logcumstd_Z_up = result[ 'logcummean_Z'], result['logcumstd_Z_down'], result[ 'logcumstd_Z_up'] std_lnZ = result['std_lnZ'] print "{0} sec".format(time.time() - start) import pylab as plt plt.gca().set_xmargin(0.1) plt.errorbar(range(1, self.nb_samples + 1), logcummean_Z, yerr=[std_lnZ, std_lnZ], fmt='or') plt.errorbar(range(1, self.nb_samples + 1), logcummean_Z, yerr=[logcumstd_Z_down, logcumstd_Z_up], fmt='ob') plt.plot([1, self.nb_samples], [f_brute_force_lnZ()] * 2, '--g') plt.ticklabel_format(useOffset=False, axis='y') plt.show() AIS_logZ = logcummean_Z[-1] assert_array_equal(params_bak[0], model.W.get_value()) assert_array_equal(params_bak[1], model.b.get_value()) assert_array_equal(params_bak[2], model.c.get_value()) print np.abs(AIS_logZ - f_brute_force_lnZ()) assert_almost_equal(AIS_logZ, f_brute_force_lnZ(), decimal=2) finally: shutil.rmtree(ais_working_dir)
def test_beta(self): beta = 1.1 model = iRBM(input_size=self.input_size, #hidden_size=1000, beta=beta) rng = np.random.RandomState(42) v1 = (rng.rand(1, self.input_size) > 0.5).astype(config.floatX) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) # Suppose all parameters of the models have a value of 0 (i.e. l=0), then # as we add hidden units, $Z(v)=\sum_z exp(-F(v, z))$ should converge to geometric_ratio = T.exp((1.-model.beta) * T.nnet.softplus(0.)).eval() log_shifted_geometric_convergence = np.float32(np.log(geometric_ratio / (1. - geometric_ratio))) Zv_theorical_convergence = log_shifted_geometric_convergence # In fact, we can estimate the number of hidden units needed to be at $\epsilon$ of the convergence point. eps = 1e-7 hidden_size = (np.log(eps)+np.log(1-geometric_ratio))/np.log(geometric_ratio) hidden_size = int(np.ceil(hidden_size)) model.hidden_size = hidden_size model.W.set_value(np.zeros((model.hidden_size, model.input_size), dtype=theano.config.floatX)) model.b.set_value(np.zeros((model.hidden_size,), dtype=theano.config.floatX)) free_energies = [] for z in range(1, model.hidden_size+1): free_energies.append(F_vz(v1, z)) Z_v = logsumexp(-np.array(free_energies)).eval() print hidden_size, ':', Z_v, Zv_theorical_convergence, abs(Zv_theorical_convergence-Z_v) assert_almost_equal(Z_v, Zv_theorical_convergence, decimal=6)
def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') logsumexp_E = theano.function([v, h, z], -logsumexp(-self.model.E(v, h, z))) F_vz = theano.function([v, z], self.model.F(v, z)) rng = np.random.RandomState(42) v1 = (rng.rand(1, self.input_size) > 0.5).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) # Check the free energy F(v, z) is correct. for z in range(1, self.hidden_size+1): h = np.array(H[::2**(self.hidden_size-z)]) free_energy_vz = logsumexp_E(v1, h, z) assert_almost_equal(F_vz(v1, z), free_energy_vz, decimal=6) # We now check that free energy F(v) assumes an infinite number of hidden units. # To do so, we create another model that has an infinite (read a lot) number of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=self.model.input_size, hidden_size=self.model.hidden_size + nb_hidden_units_to_add, beta=self.model.beta.get_value()) model.W.set_value(np.r_[self.model.W.get_value(), np.zeros((nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)]) model.b.set_value(np.r_[self.model.b.get_value(), np.zeros((nb_hidden_units_to_add,), dtype=theano.config.floatX)]) model.c.set_value(self.model.c.get_value()) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) free_energies_vz = [] for z in range(1, model.hidden_size+1): free_energies_vz.append(F_vz(v1, z)) Fv = -logsumexp(-np.array(free_energies_vz)).eval() v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv], decimal=5) # decimal=5 needed for float32 v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv]*self.batch_size, decimal=5) # decimal=5 needed for float32
def test_base_rate(self): # All binary combinaisons for V and H_z V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) # Construct Hz, a subset of H, using np.NaN as padding. Hz = [] for z in range(1, self.hidden_size+1): hz = np.array(H[::2**(self.hidden_size-z)]) hz[:, z:] = np.NaN Hz.extend(hz) Hz = np.array(Hz) assert_equal(len(Hz), np.sum(2**(np.arange(self.hidden_size)+1))) assert_true(len(Hz) < self.hidden_size * 2**self.hidden_size) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. # base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: base_rate_lnZ = base_rate.compute_lnZ().eval().astype(config.floatX) v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') lnZ = theano.function([v, h, z], logsumexp(-base_rate.E(v, h, z))) energies = [] for z in range(1, self.hidden_size+1): hz = np.array(H[::2**(self.hidden_size-z)]) energies.append(lnZ(V, hz, z)) brute_force_lnZ = logsumexp(np.array(energies)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.marginalize_over_v_z(Hz)).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)
def test_base_rate(self): # All binary combinaisons for V and H_z V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) #H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. # base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: print base_rate base_rate_lnZ = base_rate.compute_lnZ().eval().astype(config.floatX) # We simulate having an infinite number of hidden units by adding lot of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=base_rate.input_size, hidden_size=base_rate.hidden_size + nb_hidden_units_to_add, beta=base_rate.beta.get_value()) model.W = T.join(0, base_rate.W, np.zeros((nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)) model.b = T.join(0, base_rate.b, np.zeros((nb_hidden_units_to_add,), dtype=theano.config.floatX)) model.c = base_rate.c v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) energies = [] for z in range(1, model.hidden_size+1): energies.append(F_vz(V, z)) brute_force_lnZ = logsumexp(-np.array(energies)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=5) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)
def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') logsumexp_E = theano.function([v, h, z], -logsumexp(-self.model.E(v, h, z))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) energies = [] for z in range(1, self.hidden_size+1): h = np.array(H[::2**(self.hidden_size-z)]) energies.append(logsumexp_E(v1, h, z)) Fv = -logsumexp(-np.array(energies)).eval() v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv]) v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv]*self.batch_size)
def test_compute_lnZ(self): v = T.matrix('v') z = T.iscalar('z') V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) #H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) # We simulate having an infinite number of hidden units by adding lot of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=self.model.input_size, hidden_size=self.model.hidden_size + nb_hidden_units_to_add, beta=self.model.beta.get_value()) model.W.set_value(np.r_[self.model.W.get_value(), np.zeros( (nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)]) model.b.set_value( np.r_[self.model.b.get_value(), np.zeros( (nb_hidden_units_to_add, ), dtype=theano.config.floatX)]) model.c.set_value(self.model.c.get_value()) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) energies = [] for z in range(1, model.hidden_size + 1): energies.append(F_vz(V, z)) lnZ = logsumexp(-np.array(energies)).eval() lnZ_using_free_energy = theano.function( [v], logsumexp(-self.model.free_energy(v))) assert_almost_equal(lnZ_using_free_energy(V), lnZ, decimal=5) # decimal=5 needed for float32
def test_sample_z_given_v(self): v = T.matrix('v') z = T.iscalar('z') v1 = np.random.rand(1, self.input_size).astype(config.floatX) # We simulate having an infinite number of hidden units by adding lot of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=self.model.input_size, hidden_size=self.model.hidden_size + nb_hidden_units_to_add, beta=self.model.beta.get_value()) model.W.set_value(np.r_[self.model.W.get_value(), np.zeros((nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)]) model.b.set_value(np.r_[self.model.b.get_value(), np.zeros((nb_hidden_units_to_add,), dtype=theano.config.floatX)]) model.c.set_value(self.model.c.get_value()) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) energies = [] for z in range(1, model.hidden_size+1): energies.append(F_vz(v1, z)) energies = np.array(energies).T neg_log_probs = energies - -logsumexp(-energies, axis=1).eval() probs = np.exp(-neg_log_probs) expected_icdf = np.cumsum(probs[:, ::-1], axis=1)[:, ::-1] expected_icdf = expected_icdf[:, :self.model.hidden_size] # Test inverse cdf v = T.matrix('v') icdf_z_given_v = theano.function([v], self.model.icdf_z_given_v(v)) assert_array_almost_equal(icdf_z_given_v(v1), expected_icdf, decimal=5) # decimal=5 needed for float32 batch_size = 500000 self.model.batch_size = batch_size sample_zmask_given_v = theano.function([v], self.model.sample_zmask_given_v(v)) v2 = np.tile(v1, (self.model.batch_size, 1)) #theano.printing.pydotprint(sample_zmask_given_v) z_mask = sample_zmask_given_v(v2) # First hidden units should always be considered i.e. z_mask[:, 0] == 1 assert_equal(np.sum(z_mask[:, 0] == 0, axis=0), 0) # Test that sampled masks are as expected i.e. equal expected_icdf freq_per_z = np.sum(z_mask, axis=0) / self.model.batch_size assert_array_almost_equal(freq_per_z, expected_icdf[0], decimal=3, err_msg="Tested using MC sampling, rerun it to be certain that is an error or increase 'batch_size'.")
def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') logsumexp_E = theano.function([v, h, z], -logsumexp(-self.model.E(v, h, z))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) energies = [] for z in range(1, self.hidden_size + 1): h = np.array(H[::2**(self.hidden_size - z)]) energies.append(logsumexp_E(v1, h, z)) Fv = -logsumexp(-np.array(energies)).eval() v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv]) v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv] * self.batch_size)
def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') logsumexp_E = theano.function([v, h], -logsumexp(-self.model.E(v, h))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) Fv = logsumexp_E(v1, H) # Marginalization over $\bh$ v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv]) v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv] * self.batch_size)
def test_marginalize_over_v(self): v = T.matrix('v') h = T.matrix('h') E = theano.function([v, h], -logsumexp(-self.model.E(v, h))) h1 = np.random.rand(1, self.hidden_size).astype(config.floatX) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) expected_energy = E(V, h1) h = T.matrix('h') marginalize_over_v = theano.function([h], self.model.marginalize_over_v(h)) assert_array_almost_equal(marginalize_over_v(h1), [expected_energy]) h2 = np.tile(h1, (self.batch_size, 1)) assert_array_almost_equal(marginalize_over_v(h2), [expected_energy]*self.batch_size)
def test_base_rate(self): # All binary combinaisons for V and H. V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) base_rates = [] # Add the uniform base rate, i.e. all parameters of the model are set to 0. base_rates.append(self.model.get_base_rate()) # Add the base rate where visible biases are the ones from the model. base_rates.append(self.model.get_base_rate('c')) # Add the base rate where hidden biases are the ones from the model. base_rates.append(self.model.get_base_rate('b')) # Not implemented for base_rate, anneable_params in base_rates: base_rate_lnZ = base_rate.compute_lnZ().eval().astype(config.floatX) brute_force_lnZ = logsumexp(-base_rate.E(V, H)).eval() assert_almost_equal(brute_force_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.free_energy(V), axis=0).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6) theano_lnZ = logsumexp(-base_rate.marginalize_over_v(H)).eval() assert_almost_equal(theano_lnZ.astype(config.floatX), base_rate_lnZ, decimal=6)
def test_free_energy(self): v = T.matrix('v') h = T.matrix('h') logsumexp_E = theano.function([v, h], -logsumexp(-self.model.E(v, h))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) Fv = logsumexp_E(v1, H) # Marginalization over $\bh$ v = T.matrix('v') free_energy = theano.function([v], self.model.free_energy(v)) assert_array_almost_equal(free_energy(v1), [Fv]) v2 = np.tile(v1, (self.batch_size, 1)) assert_array_almost_equal(free_energy(v2), [Fv]*self.batch_size)
def test_verify_AIS(self): model = iRBM(input_size=self.input_size, hidden_size=self.hidden_size, beta=self.beta) model.W.set_value(self.W) model.b.set_value(self.b) model.c.set_value(self.c) # Brute force print "Computing lnZ using brute force (i.e. summing the free energy of all posible $v$)..." V = theano.shared(value=cartesian([(0, 1)] * self.input_size, dtype=config.floatX)) brute_force_lnZ = logsumexp(-model.free_energy(V), 0) f_brute_force_lnZ = theano.function([], brute_force_lnZ) params_bak = [param.get_value() for param in model.parameters] print "Approximating lnZ using AIS..." import time start = time.time() try: ais_working_dir = tempfile.mkdtemp() result = compute_AIS(model, M=self.nb_samples, betas=self.betas, seed=1234, ais_working_dir=ais_working_dir, force=True) logcummean_Z, logcumstd_Z_down, logcumstd_Z_up = result['logcummean_Z'], result['logcumstd_Z_down'], result['logcumstd_Z_up'] std_lnZ = result['std_lnZ'] print "{0} sec".format(time.time() - start) import pylab as plt plt.gca().set_xmargin(0.1) plt.errorbar(range(1, self.nb_samples+1), logcummean_Z, yerr=[std_lnZ, std_lnZ], fmt='or') plt.errorbar(range(1, self.nb_samples+1), logcummean_Z, yerr=[logcumstd_Z_down, logcumstd_Z_up], fmt='ob') plt.plot([1, self.nb_samples], [f_brute_force_lnZ()]*2, '--g') plt.ticklabel_format(useOffset=False, axis='y') plt.show() AIS_logZ = logcummean_Z[-1] assert_array_equal(params_bak[0], model.W.get_value()) assert_array_equal(params_bak[1], model.b.get_value()) assert_array_equal(params_bak[2], model.c.get_value()) print np.abs(AIS_logZ - f_brute_force_lnZ()) assert_almost_equal(AIS_logZ, f_brute_force_lnZ(), decimal=2) finally: shutil.rmtree(ais_working_dir)
def test_sample_z_given_v(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') E = theano.function([v, h, z], logsumexp(-self.model.E(v, h, z))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) energies = [] for z in range(1, self.hidden_size + 1): h = np.array(H[::2**(self.hidden_size - z)]) energies.append(E(v1, h, z)) probs = T.nnet.softmax(T.stack(energies)) expected_icdf = T.cumsum(probs[:, ::-1], axis=1)[:, ::-1].eval() # Test inverse cdf v = T.matrix('v') icdf_z_given_v = theano.function([v], self.model.icdf_z_given_v(v)) assert_array_almost_equal(icdf_z_given_v(v1), expected_icdf) batch_size = 500000 self.model.batch_size = batch_size sample_zmask_given_v = theano.function( [v], self.model.sample_zmask_given_v(v)) v2 = np.tile(v1, (self.model.batch_size, 1)) #theano.printing.pydotprint(sample_zmask_given_v) z_mask = sample_zmask_given_v(v2) # First hidden units should always be considered i.e. z_mask[:, 0] == 1 assert_equal(np.sum(z_mask[:, 0] == 0, axis=0), 0) # Test that sampled masks are as expected i.e. equal expected_icdf freq_per_z = np.sum(z_mask, axis=0) / self.model.batch_size assert_array_almost_equal( freq_per_z, expected_icdf[0], decimal=3, err_msg= "Tested using MC sampling, rerun it to be certain that is an error or increase 'batch_size'." )
def test_beta(self): beta = 1.1 model = iRBM( input_size=self.input_size, #hidden_size=1000, beta=beta) rng = np.random.RandomState(42) v1 = (rng.rand(1, self.input_size) > 0.5).astype(config.floatX) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) # Suppose all parameters of the models have a value of 0 (i.e. l=0), then # as we add hidden units, $Z(v)=\sum_z exp(-F(v, z))$ should converge to geometric_ratio = T.exp((1. - model.beta) * T.nnet.softplus(0.)).eval() log_shifted_geometric_convergence = np.float32( np.log(geometric_ratio / (1. - geometric_ratio))) Zv_theorical_convergence = log_shifted_geometric_convergence # In fact, we can estimate the number of hidden units needed to be at $\epsilon$ of the convergence point. eps = 1e-7 hidden_size = (np.log(eps) + np.log(1 - geometric_ratio)) / np.log(geometric_ratio) hidden_size = int(np.ceil(hidden_size)) model.hidden_size = hidden_size model.W.set_value( np.zeros((model.hidden_size, model.input_size), dtype=theano.config.floatX)) model.b.set_value( np.zeros((model.hidden_size, ), dtype=theano.config.floatX)) free_energies = [] for z in range(1, model.hidden_size + 1): free_energies.append(F_vz(v1, z)) Z_v = logsumexp(-np.array(free_energies)).eval() print hidden_size, ':', Z_v, Zv_theorical_convergence, abs( Zv_theorical_convergence - Z_v) assert_almost_equal(Z_v, Zv_theorical_convergence, decimal=6)
def pdf_z_given_v(self, v, method="softmax"): if method == "softmax": log_z_given_v = self.log_z_given_v(v) prob_z_given_v = T.nnet.softmax(log_z_given_v) # If 2D, softmax is perform along axis=1. elif method == "infinite": log_z_given_v = self.log_z_given_v(v) geometric_ratio = T.exp((1.0 - self.beta) * T.nnet.softplus(0.0)).eval() log_shifted_geometric_convergence = np.float32(np.log(geometric_ratio / (1.0 - geometric_ratio))) ### Use this trick until theano.multinomial is fixed. ### # We add the remaining of the geometric series in the last bucket. log_z_given_v = T.set_subtensor( log_z_given_v[:, -1], log_z_given_v[:, -1] + log_shifted_geometric_convergence ) log_sum_z_given_v = logsumexp(log_z_given_v, axis=1) ### END ### prob_z_given_v = T.exp(log_z_given_v - log_sum_z_given_v[:, None]) return prob_z_given_v
def test_compute_lnZ(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') lnZ = theano.function([v, h, z], logsumexp(-self.model.E(v, h, z))) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) energies = [] for z in range(1, self.hidden_size + 1): hz = np.array(H[::2**(self.hidden_size - z)]) energies.append(lnZ(V, hz, z)) lnZ = logsumexp(np.array(energies)).eval() lnZ_using_free_energy = theano.function( [v], logsumexp(-self.model.free_energy(v))) assert_almost_equal(lnZ_using_free_energy(V), lnZ, decimal=6) h = T.matrix('h') z = T.iscalar('z') lnZ_using_marginalize_over_v = theano.function( [h, z], logsumexp(self.model.marginalize_over_v(h, z))) energies = [] for z in range(1, self.hidden_size + 1): hz = np.array(H[::2**(self.hidden_size - z)]) energies.append(lnZ_using_marginalize_over_v(hz, z)) assert_almost_equal(logsumexp(np.array(energies)).eval(), lnZ, decimal=6) # Construct Hz, a subset of H, using np.NaN as padding. Hz = [] for z in range(1, self.hidden_size + 1): hz = np.array(H[::2**(self.hidden_size - z)]) hz[:, z:] = np.NaN Hz.extend(hz) Hz = np.array(Hz) assert_equal(len(Hz), np.sum(2**(np.arange(self.hidden_size) + 1))) assert_true(len(Hz) < self.hidden_size * 2**self.hidden_size) lnZ_using_marginalize_over_v_z = theano.function( [h], logsumexp(-self.model.marginalize_over_v_z(h))) assert_almost_equal(lnZ_using_marginalize_over_v_z(Hz), lnZ, decimal=6)
def test_sample_z_given_v(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') E = theano.function([v, h, z], logsumexp(-self.model.E(v, h, z))) v1 = np.random.rand(1, self.input_size).astype(config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) energies = [] for z in range(1, self.hidden_size+1): h = np.array(H[::2**(self.hidden_size-z)]) energies.append(E(v1, h, z)) probs = T.nnet.softmax(T.stack(energies)) expected_icdf = T.cumsum(probs[:, ::-1], axis=1)[:, ::-1].eval() # Test inverse cdf v = T.matrix('v') icdf_z_given_v = theano.function([v], self.model.icdf_z_given_v(v)) assert_array_almost_equal(icdf_z_given_v(v1), expected_icdf) batch_size = 500000 self.model.batch_size = batch_size sample_zmask_given_v = theano.function([v], self.model.sample_zmask_given_v(v)) v2 = np.tile(v1, (self.model.batch_size, 1)) #theano.printing.pydotprint(sample_zmask_given_v) z_mask = sample_zmask_given_v(v2) # First hidden units should always be considered i.e. z_mask[:, 0] == 1 assert_equal(np.sum(z_mask[:, 0] == 0, axis=0), 0) # Test that sampled masks are as expected i.e. equal expected_icdf freq_per_z = np.sum(z_mask, axis=0) / self.model.batch_size assert_array_almost_equal(freq_per_z, expected_icdf[0], decimal=3, err_msg="Tested using MC sampling, rerun it to be certain that is an error or increase 'batch_size'.")
def test_compute_lnZ(self): v = T.matrix('v') h = T.matrix('h') z = T.iscalar('z') lnZ = theano.function([v, h, z], logsumexp(-self.model.E(v, h, z))) V = cartesian([(0, 1)] * self.input_size, dtype=config.floatX) H = cartesian([(0, 1)] * self.hidden_size, dtype=config.floatX) energies = [] for z in range(1, self.hidden_size+1): hz = np.array(H[::2**(self.hidden_size-z)]) energies.append(lnZ(V, hz, z)) lnZ = logsumexp(np.array(energies)).eval() lnZ_using_free_energy = theano.function([v], logsumexp(-self.model.free_energy(v))) assert_almost_equal(lnZ_using_free_energy(V), lnZ, decimal=6) h = T.matrix('h') z = T.iscalar('z') lnZ_using_marginalize_over_v = theano.function([h, z], logsumexp(self.model.marginalize_over_v(h, z))) energies = [] for z in range(1, self.hidden_size+1): hz = np.array(H[::2**(self.hidden_size-z)]) energies.append(lnZ_using_marginalize_over_v(hz, z)) assert_almost_equal(logsumexp(np.array(energies)).eval(), lnZ, decimal=6) # Construct Hz, a subset of H, using np.NaN as padding. Hz = [] for z in range(1, self.hidden_size+1): hz = np.array(H[::2**(self.hidden_size-z)]) hz[:, z:] = np.NaN Hz.extend(hz) Hz = np.array(Hz) assert_equal(len(Hz), np.sum(2**(np.arange(self.hidden_size)+1))) assert_true(len(Hz) < self.hidden_size * 2**self.hidden_size) lnZ_using_marginalize_over_v_z = theano.function([h], logsumexp(-self.model.marginalize_over_v_z(h))) assert_almost_equal(lnZ_using_marginalize_over_v_z(Hz), lnZ, decimal=6)
def test_sample_z_given_v(self): v = T.matrix('v') z = T.iscalar('z') v1 = np.random.rand(1, self.input_size).astype(config.floatX) # We simulate having an infinite number of hidden units by adding lot of hidden units with parameters set to 0. nb_hidden_units_to_add = 10000 model = iRBM(input_size=self.model.input_size, hidden_size=self.model.hidden_size + nb_hidden_units_to_add, beta=self.model.beta.get_value()) model.W.set_value(np.r_[self.model.W.get_value(), np.zeros( (nb_hidden_units_to_add, model.input_size), dtype=theano.config.floatX)]) model.b.set_value( np.r_[self.model.b.get_value(), np.zeros( (nb_hidden_units_to_add, ), dtype=theano.config.floatX)]) model.c.set_value(self.model.c.get_value()) v = T.matrix('v') z = T.iscalar('z') F_vz = theano.function([v, z], model.F(v, z)) energies = [] for z in range(1, model.hidden_size + 1): energies.append(F_vz(v1, z)) energies = np.array(energies).T neg_log_probs = energies - -logsumexp(-energies, axis=1).eval() probs = np.exp(-neg_log_probs) expected_icdf = np.cumsum(probs[:, ::-1], axis=1)[:, ::-1] expected_icdf = expected_icdf[:, :self.model.hidden_size] # Test inverse cdf v = T.matrix('v') icdf_z_given_v = theano.function([v], self.model.icdf_z_given_v(v)) assert_array_almost_equal(icdf_z_given_v(v1), expected_icdf, decimal=5) # decimal=5 needed for float32 batch_size = 500000 self.model.batch_size = batch_size sample_zmask_given_v = theano.function( [v], self.model.sample_zmask_given_v(v)) v2 = np.tile(v1, (self.model.batch_size, 1)) #theano.printing.pydotprint(sample_zmask_given_v) z_mask = sample_zmask_given_v(v2) # First hidden units should always be considered i.e. z_mask[:, 0] == 1 assert_equal(np.sum(z_mask[:, 0] == 0, axis=0), 0) # Test that sampled masks are as expected i.e. equal expected_icdf freq_per_z = np.sum(z_mask, axis=0) / self.model.batch_size assert_array_almost_equal( freq_per_z, expected_icdf[0], decimal=3, err_msg= "Tested using MC sampling, rerun it to be certain that is an error or increase 'batch_size'." )
def free_energy(self, v): """ Marginalization over hidden units""" free_energy = -T.dot(v, self.c) - logsumexp(self.log_z_given_v(v), axis=1) # Sum over z' return free_energy
def free_energy_zmask(self, v, zmask): """ Marginalization over hidden units""" free_energy = -T.dot(v, self.c) - logsumexp(self.log_z_given_v(v)*zmask, axis=1) # Sum over z' return free_energy
def pdf_z_given_v(self, v): log_z_given_v = self.log_z_given_v(v) log_sum_z_given_v = logsumexp(log_z_given_v, axis=1) prob_z_given_v = T.exp(log_z_given_v - log_sum_z_given_v[:, None]) return prob_z_given_v