def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score) # Test numerical stability (#2785): would previously generate infinities # and crash with an exception. with np.errstate(under='ignore'): rbm1.score_samples(np.arange(1000) * 100)
def testRBM(): X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) print X model = BernoulliRBM(n_components=2) model.fit(X) print dir(model) print model.transform(X) print model.score_samples(X) print model.gibbs
def test_small_sparse_partial_fit(): for sparse in [csc_matrix, csr_matrix]: X_sparse = sparse(Xdigits[:100]) X = Xdigits[:100].copy() rbm1 = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, random_state=9) rbm2 = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, random_state=9) rbm1.partial_fit(X_sparse) rbm2.partial_fit(X) assert_almost_equal(rbm1.score_samples(X).mean(), rbm2.score_samples(X).mean(), decimal=0)
def run_test(params, model): if model == "rf": n_tree, mtry = params print "# Trees: ", n_tree print "mtry: ", mtry rf = RandomForestClassifier(n_estimators= int(n_tree), verbose = True, n_jobs = -1, max_features= int(mtry)) rf.fit(X, y) modelPred = rf.predict(X) elif model == "svm": C, kernel = params print "# Cost: ", C print "kernel: ", kernel svmod = SVC(int(C), kernel) svmod.fit(X, y) modelPred = svmod.predict(X) elif model == "knn": k = params print "# k: ", k knnmod = KNeighborsClassifier(int(k)) knnmod.fit(X, y) modelPred =knnmod.predict(X) elif model == "NeuralNetwork": n_components, learning_rate, batch_size, n_iter = params print "# n_components: ", n_components print "# learning_rate: ", learning_rate print "# batch_size: ", batch_size print "# n_iter: ", n_iter nnmod = BernoulliRBM(int(n_components), learning_rate, int(batch_size), int(n_iter)) nnmod.fit(X, y) modelPred =nnmod.score_samples(X) accuError = AccuracyErrorCalc(y, modelPred) return accuError
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score)
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score)
def testRBM(): X = np.array([[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]) print(X) model = BernoulliRBM(n_components=2) model.fit(X) print(dir(model)) print(model.transform(X)) print(model.score_samples(X)) print(model.gibbs)
def test_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, n_iter=7, random_state=9) rbm.fit(X) assert_almost_equal(rbm.score_samples(X).mean(), -21.0, decimal=0) # in-place tricks shouldn't have modified X assert_array_equal(X, Xdigits)
def test_score_samples(): """Check that the pseudo likelihood is computed without clipping. http://fa.bianp.net/blog/2013/numerical-optimizers-for-logistic-regression/ """ rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert((rbm1.score_samples(X) < -300).all())
def do_scratch(): from sklearn.neural_network import BernoulliRBM rbm = BernoulliRBM(n_components=64, verbose=True, n_iter=500, batch_size=len(Y_TRAIN), learning_rate=0.01) rbm.fit(Y_TRAIN) print(rbm.score_samples(Y_TRAIN))
def test_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, n_iter=7, random_state=9) rbm.fit(X) assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0) # in-place tricks shouldn't have modified X assert_array_equal(X, Xdigits)
def test_partial_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=20, random_state=9) n_samples = X.shape[0] n_batches = int(np.ceil(float(n_samples) / rbm.batch_size)) batch_slices = np.array_split(X, n_batches) for i in range(7): for batch in batch_slices: rbm.partial_fit(batch) assert_almost_equal(rbm.score_samples(X).mean(), -21.0, decimal=0) assert_array_equal(X, Xdigits)
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score) # Test numerical stability (#2785): would previously generate infinities # and crash with an exception. with np.errstate(under="ignore"): rbm1.score_samples(np.arange(1000) * 100)
def test_partial_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=20, random_state=9) n_samples = X.shape[0] n_batches = int(np.ceil(float(n_samples) / rbm.batch_size)) batch_slices = np.array_split(X, n_batches) for i in range(7): for batch in batch_slices: rbm.partial_fit(batch) assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0) assert_array_equal(X, Xdigits)
class RBMModel(JointModel): def __init__(self, hyper_params, random=True, name=None): super(RBMModel, self).__init__(hyper_params, random, name) self.model = BernoulliRBM() def set_params(self, params): self.model = BernoulliRBM(**params) def evaluate(self, X): return self.model.score_samples(X).mean() def generate_samples(self, start, step=1): for i in range(step): start = self.model.gibbs(start) return start
def estimate_n_components(): X = load_data('gender/male') X = X.astype(np.float32) / 256 n_comp_list = [100, 200, 300, 400, 500, 600, 700, 800, 900, 1000, 1100, 1200] scores = [] for n_comps in n_comp_list: rbm = BernoulliRBM(random_state=0, verbose=True) rbm.learning_rate = 0.06 rbm.n_iter = 50 rbm.n_components = 100 rbm.fit(X) score = rbm.score_samples(X).mean() scores.append(score) plt.figure() plt.plot(n_comp_list, scores) plt.show() return n_comp_list, scores
def CalculateObjectFunction(chrom): W_Comb_1 = int(chrom[0]) W_Comb_2 = int(chrom[1]) W_Comb_3 = int(chrom[2]) Decoded_X4_W = Decode_X4(chrom[3:]) kf = RepeatedKFold(n_splits=5, n_repeats=2) AC = [] for train, test in kf.split(X): model = BernoulliRBM(n_components=W_Comb_1, learning_rate=Decoded_X4_W, batch_size=W_Comb_2, n_iter=W_Comb_3, verbose=1, random_state=0) model.fit(X[train]) AC.append(model.score_samples(X[test]).mean()) return statistics.mean(AC)
import numpy as np from sklearn.neural_network import BernoulliRBM from sklearn import cross_validation #Read from data file with open("dataset") as textFile: lines = [line.split() for line in textFile] a = np.array(lines, dtype=float) dataPoints = np.array(a[:, [1, 2, 3]]) target = np.array(a[:, 0]) model = BernoulliRBM() last_score = 0 last_partition = 0 for i in range(2, 10): x_train, x_test, y_train, y_test = cross_validation.train_test_split(dataPoints, target, test_size = float(i)/10.0, random_state = 0) model.fit(x_train, y_train) if (model.score_samples(x_test, y_test)) > last_score: last_score = model.score_samples(x_test, y_test) last_partition = (i+1)/10 x_train, x_test, y_train, y_test = cross_validation.train_test_split(dataPoints, target, test_size = last_partition, random_state = 0) model.fit(x_train, y_train) print model.score_samples(x_test, y_test) print last_score
class AbstractRBM(Model): """Restricted Boltzmann Machine RBM code adapted from http://deeplearning.net/tutorial/rbm.html Epoch 15 of 15 took 635.792s (2448 minibatches) training loss/acc: -62.311016 -62.311016 Training Params --------------- batch_size: 20 learning_rate: 0.1 """ def __init__(self, n_dim, n_out, n_chan=1, n_superbatch=12800, opt_alg='adam', opt_params={ 'lr': 1e-3, 'b1': 0.9, 'b2': 0.99 }): """RBM constructor. Defines the parameters of the model along with basic operations for inferring hidden from visible (and vice-versa), as well as for performing CD updates. """ # store sklearn RBM instance self.rbm = BernoulliRBM(random_state=0, n_components=self.n_hidden) self.n_chain = 100 # initialize storage for the persistent chain (state = hidden # layer of chain) self.persistent_chain = theano.shared( np.zeros((self.n_batch, self.n_hidden), dtype=theano.config.floatX), borrow=True, ) self.bit_i_idx = theano.shared(value=0, name='bit_i_idx') # create shared data variables self.train_set_x = theano.shared( np.empty((n_superbatch, n_chan, n_dim, n_dim), dtype=theano.config.floatX), borrow=False, ) self.val_set_x = theano.shared( np.empty((n_superbatch, n_chan, n_dim, n_dim), dtype=theano.config.floatX), borrow=False, ) # create y-variables self.train_set_y = theano.shared(np.empty((n_superbatch, ), dtype='int32'), borrow=False) self.val_set_y = theano.shared(np.empty((n_superbatch, ), dtype='int32'), borrow=False) # train_set_y_int = T.cast(train_set_y, 'int32') # val_set_y_int = T.cast(val_set_y, 'int32') def free_energy(self, v_sample): """Function to compute the free energy""" wx_b = T.dot(v_sample, self.W) + self.hbias vbias_term = T.dot(v_sample, self.vbias) hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1) return -hidden_term - vbias_term def propup(self, vis): """This function propagates the visible units activation upwards to the hidden units Note that we return also the pre-sigmoid activation of the layer. As it will turn out later, due to how Theano deals with optimizations, this symbolic variable will be needed to write down a more stable computational graph (see details in the reconstruction cost function) """ pre_sigmoid_activation = T.dot(vis, self.W) + self.hbias return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)] def sample_h_given_v(self, v0_sample): """This function infers state of hidden units given visible units""" # compute the activation of the hidden units given a sample of # the visibles pre_sigmoid_h1, h1_mean = self.propup(v0_sample) # get a sample of the hiddens given their activation # Note that theano_rng.binomial returns a symbolic sample of dtype # int64 by default. If we want to keep our computations in floatX # for the GPU we need to specify to return the dtype floatX h1_sample = self.theano_rng.binomial( size=h1_mean.shape, n=1, p=h1_mean, dtype=theano.config.floatX, ) return [pre_sigmoid_h1, h1_mean, h1_sample] def propdown(self, hid): """This function propagates the hidden units activation downwards to the visible units Note that we return also the pre_sigmoid_activation of the layer. As it will turn out later, due to how Theano deals with optimizations, this symbolic variable will be needed to write down a more stable computational graph (see details in the reconstruction cost function) """ pre_sigmoid_activation = T.dot(hid, self.W.T) + self.vbias return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)] def sample_v_given_h(self, h0_sample): """This function infers state of visible units given hidden units""" # compute the activation of the visible given the hidden sample pre_sigmoid_v1, v1_mean = self.propdown(h0_sample) # get a sample of the visible given their activation # Note that theano_rng.binomial returns a symbolic sample of dtype # int64 by default. If we want to keep our computations in floatX # for the GPU we need to specify to return the dtype floatX v1_sample = self.theano_rng.binomial( size=v1_mean.shape, n=1, p=v1_mean, dtype=theano.config.floatX, ) return [pre_sigmoid_v1, v1_mean, v1_sample] def gibbs_hvh(self, h0_sample): """This function implements one step of Gibbs sampling, starting from the hidden state """ pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h0_sample) pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v1_sample) return [ pre_sigmoid_v1, v1_mean, v1_sample, pre_sigmoid_h1, h1_mean, h1_sample ] def gibbs_vhv(self, v0_sample): """This function implements one step of Gibbs sampling, starting from the visible state """ pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v0_sample) pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h1_sample) return [ pre_sigmoid_h1, h1_mean, h1_sample, pre_sigmoid_v1, v1_mean, v1_sample ] def pseudolikelihood(self, data): self.rbm.components_ = self.W.get_value().T self.rbm.intercept_visible_ = self.vbias.get_value() self.rbm.intercept_hidden_ = self.hbias.get_value() return self.rbm.score_samples(data).mean() def get_pseudo_likelihood_cost(self, X, updates): """Stochastic approximation to the pseudo-likelihood""" # index of bit i in expression p(x_i | x_{\i}) bit_i_idx = self.bit_i_idx # bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round(X) # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns # the result to xi_flip, instead of working in place on xi. xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) return cost def hallucinate(self): """Once the RBM is trained, we can then use the gibbs_vhv function to implement the Gibbs chain required for sampling. This overwrites the hallucinate function in Model completely. """ n_samples = 10 hallu_set = self.hallu_set.reshape((-1, self.n_visible)) persistent_vis_chain = theano.shared(hallu_set) # define one step of Gibbs sampling (mf = mean-field) define a # function that does `1000` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( self.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=1000, name="gibbs_vhv", ) # add to updates that takes care of our persistent chain : updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function( [], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn', ) for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() img_size = int(np.sqrt(self.n_chain)) vis_mf = vis_mf.reshape((img_size, img_size, self.n_dim, self.n_dim)) vis_mf = np.concatenate(np.split(vis_mf, img_size, axis=0), axis=3) # split into img_size (1,1,n_dim,n_dim*img_size) images, # concat along rows -> 1,1,n_dim*img_size,n_dim*img_size vis_mf = np.concatenate(np.split(vis_mf, img_size, axis=1), axis=2) return np.squeeze(vis_mf) def hallucinate_chain(self): """Once the RBM is trained, we can then use the gibbs_vhv function to implement the Gibbs chain required for sampling. This overwrites the hallucinate function in Model completely. """ n_samples = 10 # hallu_set = self.hallu_set.reshape((-1, self.n_visible)) hallu_set = np.random.rand(1, 64).astype('float32') # hallu_set = self.Phi.get_value()[:,0] persistent_vis_chain = theano.shared(hallu_set) # define one step of Gibbs sampling (mf = mean-field) define a # function that does `1000` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( self.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=100, name="gibbs_vhv", ) # add to updates that takes care of our persistent chain : updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function([], vis_mfs, updates=updates, name='sample_fn') vis_mf = sample_fn() print vis_mf.shape # vis_mf = vis_mf[::10] print vis_mf.shape img_size = 10 vis_mf = vis_mf.reshape((img_size, img_size, self.n_dim, self.n_dim)) vis_mf = np.transpose(vis_mf, [1, 0, 2, 3]) vis_mf = np.concatenate(np.split(vis_mf, img_size, axis=0), axis=3) # split into img_size (1,1,n_dim,n_dim*img_size) images, # concat along rows -> 1,1,n_dim*img_size,n_dim*img_size vis_mf = np.concatenate(np.split(vis_mf, img_size, axis=1), axis=2) return np.squeeze(vis_mf) def E_np_h(self, h): bv_vec = self.vbias.get_value().reshape((self.n_visible, 1)) bh_vec = self.hbias.get_value().reshape((self.n_hidden, 1)) W = self.W.get_value() return (np.dot(bh_vec.T, h) + np.sum( np.log(1. + np.exp(bv_vec + np.dot(W, h))), axis=0)).flatten() def E_np_v(self, v): bv_vec = self.vbias.get_value().reshape((self.n_visible, 1)) bh_vec = self.hbias.get_value().reshape((self.n_hidden, 1)) W = self.W.get_value() return (np.dot(bv_vec.T, v) + np.sum( np.log(1. + np.exp(bh_vec + np.dot(W.T, v))), axis=0)).flatten() def logZ_exact(self, marg='v'): # get the next binary vector t = self.n_hidden if marg == 'v' else self.n_visible def inc(x): for i in xrange(t): x[i, 0] += 1 if x[i, 0] <= 1: return True x[i, 0] = 0 return False #compute the normalizing constant if marg == 'v': x = np.zeros((self.n_hidden, 1)) elif marg == 'h': x = np.zeros((self.n_visible, 1)) logZ = -np.inf while True: if marg == 'v': logF = self.E_np_h(x) elif marg == 'h': logF = self.E_np_v(x) # print ''.join([str(xi) for xi in x]), logF, logZ logZ = np.logaddexp(logZ, logF) if not inc(x): break # print return logZ def sample(self): """Once the RBM is trained, we can then use the gibbs_vhv function to implement the Gibbs chain required for sampling. This overwrites the hallucinate function in Model completely. """ n_samples = 10 hallu_set = self.hallu_set.reshape((-1, self.n_visible)) persistent_vis_chain = theano.shared(hallu_set) # define one step of Gibbs sampling (mf = mean-field) define a # function that does `1000` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( self.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=1000, name="gibbs_vhv", ) # add to updates that takes care of our persistent chain : updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function( [], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn', ) for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() img_size = int(np.sqrt(self.n_chain)) vis_mf = vis_mf.reshape((self.n_chain, self.n_dim * self.n_dim)) return vis_mf
class RBM(Model): """Restricted Boltzmann Machine RBM code adapted from http://deeplearning.net/tutorial/rbm.html Epoch 15 of 15 took 635.792s (2448 minibatches) training loss/acc: -62.311016 -62.311016 Training Params --------------- batch_size: 20 learning_rate: 0.1 """ def __init__(self, n_dim, n_out, n_chan=1, n_superbatch=12800, opt_alg='adam', opt_params={ 'lr': 1e-3, 'b1': 0.9, 'b2': 0.99 }): """RBM constructor. Defines the parameters of the model along with basic operations for inferring hidden from visible (and vice-versa), as well as for performing CD updates. """ self.numpy_rng = np.random.RandomState(1234) self.theano_rng = RandomStreams(self.numpy_rng.randint(2**30)) # save config n_batch = opt_params.get('nb') self.n_hidden = 8 self.n_batch = n_batch self.n_chain = 100 # store sklearn RBM instance self.rbm = BernoulliRBM(random_state=0, n_components=self.n_hidden) self.n_dim = n_dim self.n_out = n_out self.n_superbatch = n_superbatch self.alg = opt_alg # invoke parent constructor Model.__init__(self, n_dim, n_chan, n_out, n_superbatch, opt_alg, opt_params) def create_updates(self, grads, params, alpha, opt_alg, opt_params): # scaled_grads = [grad * alpha for grad in grads] scaled_grads = grads lr = opt_params.get('lr', 1e-3) if opt_alg == 'sgd': grad_updates = lasagne.updates.sgd(scaled_grads, params, learning_rate=lr) elif opt_alg == 'adam': b1, b2 = opt_params.get('b1', 0.9), opt_params.get('b2', 0.999) grad_updates = lasagne.updates.adam(scaled_grads, params, learning_rate=lr, beta1=b1, beta2=b2) else: grad_updates = OrderedDict() grad_updates[self.persistent_chain] = self.nh_sample # increment bit_i_idx % number as part of updates grad_updates[self.bit_i_idx] = (self.bit_i_idx + 1) % self.n_visible all_updates = dict(self.chain_updates.items() + grad_updates.items()) return all_updates def create_objectives(self, deterministic=False): """Stochastic approximation to the pseudo-likelihood""" X = self.inputs[0] X = X.reshape((-1, self.n_visible)) # index of bit i in expression p(x_i | x_{\i}) bit_i_idx = self.bit_i_idx # bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round(X) # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns # the result to xi_flip, instead of working in place on xi. xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) return cost, cost def create_inputs(self): # allocate symbolic variables for the data X = T.tensor4(dtype=theano.config.floatX) S = T.tensor3(dtype=theano.config.floatX) Y = T.ivector() idx1, idx2 = T.lscalar(), T.lscalar() alpha = T.scalar(dtype=theano.config.floatX) # learning rate return X, Y, idx1, idx2, S def create_model(self, X, Y, n_dim, n_out, n_chan=1): n_visible = n_chan * n_dim * n_dim # size of visible layer n_hidden = self.n_hidden # size of hidden layer k_steps = 5 # number of steps during CD/PCD # W is initialized with `initial_W` which is uniformely # sampled from -4*sqrt(6./(n_visible+n_hidden)) and # 4*sqrt(6./(n_hidden+n_visible)) the output of uniform if # converted using asarray to dtype theano.config.floatX so # that the code is runable on GPU initial_W = np.asarray( self.numpy_rng.uniform( low=-4 * np.sqrt(6. / (n_hidden + n_visible)), high=4 * np.sqrt(6. / (n_hidden + n_visible)), size=(n_visible, n_hidden)), dtype=theano.config.floatX, ) # theano shared variables for weights and biases W = theano.shared(value=initial_W, name='W', borrow=True) # create shared variable for hidden units bias hbias = theano.shared( value=np.zeros(n_hidden, dtype=theano.config.floatX), name='hbias', borrow=True, ) # create shared variable for visible units bias vbias = theano.shared( value=np.zeros(n_visible, dtype=theano.config.floatX), name='vbias', borrow=True, ) # initialize storage for the persistent chain (state = hidden # layer of chain) print theano.config.floatX print(self.n_batch, self.n_hidden) self.persistent_chain = theano.shared( np.zeros((self.n_batch, self.n_hidden), dtype=theano.config.floatX), borrow=True, ) self.bit_i_idx = theano.shared(value=0, name='bit_i_idx') # the data is presented as rasterized images self.W = W self.hbias = hbias self.vbias = vbias # **** WARNING: It is not a good idea to put things in this list # other than shared variables created in this function. self.params = [self.W, self.hbias, self.vbias] # network params self.n_visible = n_visible self.n_hidden = n_hidden self.k_steps = k_steps return None def free_energy(self, v_sample): """Function to compute the free energy""" wx_b = T.dot(v_sample, self.W) + self.hbias vbias_term = T.dot(v_sample, self.vbias) hidden_term = T.sum(T.log(1 + T.exp(wx_b)), axis=1) return -hidden_term - vbias_term def propup(self, vis): """This function propagates the visible units activation upwards to the hidden units Note that we return also the pre-sigmoid activation of the layer. As it will turn out later, due to how Theano deals with optimizations, this symbolic variable will be needed to write down a more stable computational graph (see details in the reconstruction cost function) """ pre_sigmoid_activation = T.dot(vis, self.W) + self.hbias return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)] def sample_h_given_v(self, v0_sample): """This function infers state of hidden units given visible units""" # compute the activation of the hidden units given a sample of # the visibles pre_sigmoid_h1, h1_mean = self.propup(v0_sample) # get a sample of the hiddens given their activation # Note that theano_rng.binomial returns a symbolic sample of dtype # int64 by default. If we want to keep our computations in floatX # for the GPU we need to specify to return the dtype floatX h1_sample = self.theano_rng.binomial( size=h1_mean.shape, n=1, p=h1_mean, dtype=theano.config.floatX, ) return [pre_sigmoid_h1, h1_mean, h1_sample] def propdown(self, hid): """This function propagates the hidden units activation downwards to the visible units Note that we return also the pre_sigmoid_activation of the layer. As it will turn out later, due to how Theano deals with optimizations, this symbolic variable will be needed to write down a more stable computational graph (see details in the reconstruction cost function) """ pre_sigmoid_activation = T.dot(hid, self.W.T) + self.vbias return [pre_sigmoid_activation, T.nnet.sigmoid(pre_sigmoid_activation)] def sample_v_given_h(self, h0_sample): """This function infers state of visible units given hidden units""" # compute the activation of the visible given the hidden sample pre_sigmoid_v1, v1_mean = self.propdown(h0_sample) # get a sample of the visible given their activation # Note that theano_rng.binomial returns a symbolic sample of dtype # int64 by default. If we want to keep our computations in floatX # for the GPU we need to specify to return the dtype floatX v1_sample = self.theano_rng.binomial( size=v1_mean.shape, n=1, p=v1_mean, dtype=theano.config.floatX, ) return [pre_sigmoid_v1, v1_mean, v1_sample] def gibbs_hvh(self, h0_sample): """This function implements one step of Gibbs sampling, starting from the hidden state """ pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h0_sample) pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v1_sample) return [ pre_sigmoid_v1, v1_mean, v1_sample, pre_sigmoid_h1, h1_mean, h1_sample ] def gibbs_vhv(self, v0_sample): """This function implements one step of Gibbs sampling, starting from the visible state """ pre_sigmoid_h1, h1_mean, h1_sample = self.sample_h_given_v(v0_sample) pre_sigmoid_v1, v1_mean, v1_sample = self.sample_v_given_h(h1_sample) return [ pre_sigmoid_h1, h1_mean, h1_sample, pre_sigmoid_v1, v1_mean, v1_sample ] def get_params(self): return self.params def create_gradients(self, cost, deterministic=False): """Returns the updates dictionary. The dictionary contains the update rules for weights and biases but also an update of the shared variable used to store the persistent chain, if one is used. :param lr: learning rate used to train the RBM :param persistent: None for CD. For PCD, shared variable containing old state of Gibbs chain. This must be a shared variable of size (batch size, number of hidden units). Returns a proxy for the cost and the updates dictionary. The dictionary contains the update rules for weights and biases but also an update of the shared variable used to store the persistent chain, if one is used. """ if deterministic == True: return None X = self.inputs[0] x = X.reshape((-1, self.n_visible)) # compute positive phase pre_sigmoid_ph, ph_mean, ph_sample = self.sample_h_given_v(x) # for PCD, we initialize from the old state of the chain chain_start = self.persistent_chain # perform actual negative phase # in order to implementPCD-k we need to scan over the # function that implements one gibbs step k times. # Read Theano tutorial on scan for more information : # http://deeplearning.net/software/theano/library/scan.html # the scan will return the entire Gibbs chain ([ pre_sigmoid_nvs, nv_means, nv_samples, pre_sigmoid_nhs, nh_means, nh_samples ], updates) = theano.scan( self.gibbs_hvh, # the None are place holders, saying that # chain_start is the initial state corresponding to the # 6th output outputs_info=[None, None, None, None, None, chain_start], n_steps=15, name="gibbs_hvh") # determine gradients on RBM parameters # note that we only need the sample at the end of the chain chain_end = nv_samples[-1] cost = T.mean(self.free_energy(x)) - T.mean( self.free_energy(chain_end)) # We must not compute the gradient through the gibbs sampling params = self.get_params() gparams = T.grad(cost, self.params, consider_constant=[chain_end]) self.nh_sample = nh_samples[-1] self.chain_updates = updates return gparams def pseudolikelihood(self, data): self.rbm.components_ = self.W.get_value().T self.rbm.intercept_visible_ = self.vbias.get_value() self.rbm.intercept_hidden_ = self.hbias.get_value() return self.rbm.score_samples(data).mean() def get_pseudo_likelihood_cost(self, X, updates): """Stochastic approximation to the pseudo-likelihood""" # index of bit i in expression p(x_i | x_{\i}) bit_i_idx = self.bit_i_idx # bit_i_idx = theano.shared(value=0, name='bit_i_idx') # binarize the input image by rounding to nearest integer xi = T.round(X) # calculate free energy for the given bit configuration fe_xi = self.free_energy(xi) # flip bit x_i of matrix xi and preserve all other bits x_{\i} # Equivalent to xi[:,bit_i_idx] = 1-xi[:, bit_i_idx], but assigns # the result to xi_flip, instead of working in place on xi. xi_flip = T.set_subtensor(xi[:, bit_i_idx], 1 - xi[:, bit_i_idx]) # calculate free energy with bit flipped fe_xi_flip = self.free_energy(xi_flip) # equivalent to e^(-FE(x_i)) / (e^(-FE(x_i)) + e^(-FE(x_{\i}))) cost = T.mean(self.n_visible * T.log(T.nnet.sigmoid(fe_xi_flip - fe_xi))) return cost def fit(self, X_train, Y_train, X_val, Y_val, n_epoch=10, n_batch=100, logname='run'): """Train the model""" alpha = 1.0 # learning rate, which can be adjusted later n_data = len(X_train) n_superbatch = self.n_superbatch for epoch in range(n_epoch): # In each epoch, we do a full pass over the training data: train_batches, train_err, train_acc = 0, 0, 0 start_time = time.time() # iterate over superbatches to save time on GPU memory transfer for X_sb, Y_sb in self.iterate_superbatches( X_train, Y_train, n_superbatch, datatype='train', shuffle=True, ): for idx1, idx2 in iterate_minibatch_idx(len(X_sb), n_batch): err, acc = self.train(idx1, idx2, alpha) # collect metrics err = self.pseudolikelihood(X_sb[idx1:idx2].reshape( -1, 784)) train_batches += 1 train_err += err train_acc += acc if train_batches % 100 == 0: n_total = epoch * n_data + n_batch * train_batches metrics = [ n_total, train_err / train_batches, train_acc / train_batches, ] log_metrics(logname, metrics) print "Epoch {} of {} took {:.3f}s ({} minibatches)".format( epoch + 1, n_epoch, time.time() - start_time, train_batches) print " training:\t\t{:.6f}\t{:.6f}".format( train_err / train_batches, train_acc / train_batches) # reserve N of training data points to kick start hallucinations hallu_i = self.numpy_rng.randint(n_data - self.n_chain) self.hallu_set = np.asarray(X_train[hallu_i:hallu_i + self.n_chain], dtype=theano.config.floatX) def hallucinate(self): """Once the RBM is trained, we can then use the gibbs_vhv function to implement the Gibbs chain required for sampling. This overwrites the hallucinate function in Model completely. """ n_samples = 10 hallu_set = self.hallu_set.reshape((-1, self.n_visible)) persistent_vis_chain = theano.shared(hallu_set) # define one step of Gibbs sampling (mf = mean-field) define a # function that does `1000` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( self.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=1000, name="gibbs_vhv", ) # add to updates that takes care of our persistent chain : updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function( [], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn', ) for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() img_size = int(np.sqrt(self.n_chain)) vis_mf = vis_mf.reshape((img_size, img_size, self.n_dim, self.n_dim)) vis_mf = np.concatenate(np.split(vis_mf, img_size, axis=0), axis=3) # split into img_size (1,1,n_dim,n_dim*img_size) images, # concat along rows -> 1,1,n_dim*img_size,n_dim*img_size vis_mf = np.concatenate(np.split(vis_mf, img_size, axis=1), axis=2) return np.squeeze(vis_mf) def sample(self): """Once the RBM is trained, we can then use the gibbs_vhv function to implement the Gibbs chain required for sampling. This overwrites the hallucinate function in Model completely. """ n_samples = 10 hallu_set = self.hallu_set.reshape((-1, self.n_visible)) persistent_vis_chain = theano.shared(hallu_set) # define one step of Gibbs sampling (mf = mean-field) define a # function that does `1000` steps before returning the # sample for plotting ([presig_hids, hid_mfs, hid_samples, presig_vis, vis_mfs, vis_samples], updates) = theano.scan( self.gibbs_vhv, outputs_info=[None, None, None, None, None, persistent_vis_chain], n_steps=1000, name="gibbs_vhv", ) # add to updates that takes care of our persistent chain : updates.update({persistent_vis_chain: vis_samples[-1]}) # construct the function that implements our persistent chain. # we generate the "mean field" activations for plotting and the actual # samples for reinitializing the state of our persistent chain sample_fn = theano.function( [], [vis_mfs[-1], vis_samples[-1]], updates=updates, name='sample_fn', ) for idx in range(n_samples): # generate `plot_every` intermediate samples that we discard, # because successive samples in the chain are too correlated vis_mf, vis_sample = sample_fn() img_size = int(np.sqrt(self.n_chain)) vis_mf = vis_mf.reshape((self.n_chain, self.n_dim * self.n_dim)) return vis_mf def load_params(self, params): """Load a given set of parameters""" self.params = params def dump_params(self): """Dump a given set of parameters""" return self.params def E_np_h(self, h): bv_vec = self.vbias.get_value().reshape((self.n_visible, 1)) bh_vec = self.hbias.get_value().reshape((self.n_hidden, 1)) W = self.W.get_value() return (np.dot(bh_vec.T, h) + np.sum( np.log(1. + np.exp(bv_vec + np.dot(W, h))), axis=0)).flatten() def E_np_v(self, v): bv_vec = self.vbias.get_value().reshape((self.n_visible, 1)) bh_vec = self.hbias.get_value().reshape((self.n_hidden, 1)) W = self.W.get_value() return (np.dot(bv_vec.T, v) + np.sum( np.log(1. + np.exp(bh_vec + np.dot(W.T, v))), axis=0)).flatten() def logZ_exact(self, marg='v'): # get the next binary vector t = self.n_hidden if marg == 'v' else self.n_visible def inc(x): for i in xrange(t): x[i, 0] += 1 if x[i, 0] <= 1: return True x[i, 0] = 0 return False #compute the normalizing constant if marg == 'v': x = np.zeros((self.n_hidden, 1)) elif marg == 'h': x = np.zeros((self.n_visible, 1)) logZ = -np.inf while True: if marg == 'v': logF = self.E_np_h(x) elif marg == 'h': logF = self.E_np_v(x) # print ''.join([str(xi) for xi in x]), logF, logZ logZ = np.logaddexp(logZ, logF) if not inc(x): break # print return logZ
class EarlyStoppingRBM: """ Adaptation on the `BernoulliRBM` class of sklearn to add the ability to stop early when training does not improve. Parameters ---------- n_components : int, optional The size of the output, default `256` batch_size : int, optional The batch size of the rbm, default `100` lr : float, optional The learning rate of the rbm, default `0.01` patience : int, optional The amount of epochs without improvement before training stops, default `3` epochs : int, optional The maximum amount of epochs, default `1000` verbose : int, optional The verbosity of the rbm, default `0` Attributes ---------- rbm : BernoulliRBM the rbm to be trained """ def __init__(self, n_components=256, batch_size=100, lr=0.01, patience=3, epochs=1000, verbose=0): self.rbm = BernoulliRBM(n_components=n_components, n_iter=1, batch_size=batch_size, learning_rate=lr, verbose=verbose) self.patience = patience self.epochs = epochs self.verbose = verbose def fit(self, data): """ Fit the rbm to the given data Parameters ---------- data : array Data to be fitted """ self.rbm.fit(data) min_likelyhood = np.mean( [np.mean(self.rbm.score_samples(data)) for _ in range(5)]) last_likelyhood = min_likelyhood min_index = 0 for i in range(1, self.epochs): if min_index + self.patience > i: if self.verbose: print('Epoch {}/{}'.format(i + 1, self.epochs)) self.rbm.fit(data) last_likelyhood = np.mean( [np.mean(self.rbm.score_samples(data)) for _ in range(5)]) if last_likelyhood < min_likelyhood: min_likelyhood = last_likelyhood min_index = i else: break
def get_likelihood(data, W, vb, hb): rbm = BernoulliRBM(n_components=W.shape[1]) rbm.components_ = W.T rbm.intercept_hidden_ = hb rbm.intercept_visible_ = vb return rbm.score_samples(data).mean()
import numpy as np from sklearn.neural_network import BernoulliRBM from sklearn import cross_validation #Read from data file with open("dataset") as textFile: lines = [line.split() for line in textFile] a = np.array(lines, dtype=float) dataPoints = np.array(a[:, [1, 2, 3]]) target = np.array(a[:, 0]) model = BernoulliRBM() last_score = 0 last_partition = 0 for i in range(2, 10): x_train, x_test, y_train, y_test = cross_validation.train_test_split( dataPoints, target, test_size=float(i) / 10.0, random_state=0) model.fit(x_train, y_train) if (model.score_samples(x_test, y_test)) > last_score: last_score = model.score_samples(x_test, y_test) last_partition = (i + 1) / 10 x_train, x_test, y_train, y_test = cross_validation.train_test_split( dataPoints, target, test_size=last_partition, random_state=0) model.fit(x_train, y_train) print model.score_samples(x_test, y_test) print last_score