def trainRBM(data, n_components, n_iter, batch_size, learning_rate=0.01): acc = [] err = np.zeros((n_iter, 2)) rbm = BernoulliRBM(verbose=True, batch_size=batch_size, random_state=1, n_components=n_components) n_features = len(data['X'][0]) rbm.learning_rate = learning_rate #initialize the weight matrix with small (normally distributed) random values with hidden and visible biases initialized to 0. rbm.components = np.random.randn(n_components, n_features) * 0.1 rbm.intercept_hidden_ = np.zeros((n_components, )) rbm.intercept_visible_ = np.zeros((n_features, )) rbm.n_iter = 1 for i in range(n_iter): rbm.fit(data['X']) test = rbm.gibbs(data['X']) train = rbm.gibbs(data['X']) err[i, 1] = np.sum( (test - data['X'])**2) / (n_features * len(data['X'])) err[i, 0] = np.sum( (train - data['X'])**2) / (n_features * len(data['X'])) return rbm, err
def test_rbm_verbose(): rbm = BernoulliRBM(n_iter=2, verbose=10) old_stdout = sys.stdout sys.stdout = StringIO() try: rbm.fit(Xdigits) finally: sys.stdout = old_stdout
def test_gibbs_smoke(): """ just seek if we don't get NaNs sampling the full digits dataset """ rng = np.random.RandomState(42) X = Xdigits rbm1 = BernoulliRBM(n_components=42, batch_size=10, n_iter=20, random_state=rng) rbm1.fit(X) X_sampled = rbm1.gibbs(X) assert_all_finite(X_sampled)
def test_transform(): X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=16, batch_size=5, n_iter=5, random_state=42) rbm1.fit(X) Xt1 = rbm1.transform(X) Xt2 = rbm1._mean_hiddens(X) assert_array_equal(Xt1, Xt2)
def test_sample_hiddens(): rng = np.random.RandomState(0) X = Xdigits[:100] rbm1 = BernoulliRBM(n_components=2, batch_size=5, n_iter=5, random_state=42) rbm1.fit(X) h = rbm1._mean_hiddens(X[0]) hs = np.mean([rbm1._sample_hiddens(X[0], rng) for i in range(100)], 0) assert_almost_equal(h, hs, decimal=1)
def test_fit(): X = Xdigits.copy() rbm = BernoulliRBM(n_components=64, learning_rate=0.1, batch_size=10, n_iter=7, random_state=9) rbm.fit(X) print rbm.get_components() assert_almost_equal(rbm.score_samples(X).mean(), -21., decimal=0) # in-place tricks shouldn't have modified X assert_array_equal(X, Xdigits)
def main(): # Load digits data from sklearn digits = datasets.load_digits() features = digits.data (n_cases, n_dims) = features.shape # Binarize digits gray_max = numpy.max(features) gray_min = numpy.min(features) th = (gray_max - gray_min) * 0.65 features_bin = (features > th).astype(int) # Initialize RBM structure and training parameters n_components = 256 learning_rate = 0.1 batch_size = 100 n_iter = 100 verbose = True # Declare RBM instance and generatively train it with the input data r = BernoulliRBM(n_components, learning_rate, batch_size, n_iter, verbose, None) r.fit(features_bin) idx = numpy.random.random_integers(0, n_cases -1); inp = features_bin[idx, :] im_inp = numpy.reshape(inp, (8, 8)) # Original digit image matplotlib.pyplot.figure(1) matplotlib.pyplot.imshow(im_inp) matplotlib.pyplot.gray() matplotlib.pyplot.title('Input Image') matplotlib.pyplot.show() print features_bin # Reconstruct input digit image feature rec = reconstruct(r, inp) im_rec = numpy.reshape(rec, (8, 8)) # Reconstructed digit image matplotlib.pyplot.figure(2) matplotlib.pyplot.imshow(im_rec) matplotlib.pyplot.gray() matplotlib.pyplot.title('Reconstructed Image') matplotlib.pyplot.show() print rec
def test_fit_gibbs(): """ Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from the same input """ rng = np.random.RandomState(42) X = np.array([[0.], [1.]]) rbm1 = BernoulliRBM(n_components=2, batch_size=2, n_iter=42, random_state=rng) # you need that much iters rbm1.fit(X) assert_almost_equal(rbm1.components_, np.array([[0.02649814], [0.02009084]]), decimal=4) assert_almost_equal(rbm1.gibbs(X), X) return rbm1
def test_consistency_val(self): rbm1 = BernoulliRBM(max_epoch=2, model_path='test_rbm_1/', **self.rbm_config) rbm2 = BernoulliRBM(max_epoch=2, model_path='test_rbm_2/', **self.rbm_config) rbm1.fit(self.X, self.X_val) rbm2.fit(self.X, self.X_val) self.compare_weights(rbm1, rbm2) self.compare_transforms(rbm1, rbm2) # cleanup self.cleanup()
def test_fit_gibbs_sparse(): """ Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from the same input even when the input is sparse, and test against non-sparse """ rbm1 = test_fit_gibbs() rng = np.random.RandomState(42) from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) rbm2 = BernoulliRBM(n_components=2, batch_size=2, n_iter=42, random_state=rng) rbm2.fit(X) assert_almost_equal(rbm2.components_, np.array([[0.02649814], [0.02009084]]), decimal=4) assert_almost_equal(rbm2.gibbs(X), X.toarray()) assert_almost_equal(rbm1.components_, rbm2.components_)
def test_sparse_and_verbose(): """ Make sure RBM works with sparse input when verbose=True """ old_stdout = sys.stdout sys.stdout = StringIO() from scipy.sparse import csc_matrix X = csc_matrix([[0.], [1.]]) rbm = BernoulliRBM(n_components=2, batch_size=2, n_iter=1, random_state=42, verbose=True) try: rbm.fit(X) s = sys.stdout.getvalue() # make sure output is sound assert_true(re.match(r"\[BernoulliRBM\] Iteration 1," r" pseudo-likelihood = -?(\d)+(\.\d+)?," r" time = (\d|\.)+s", s)) finally: sio = sys.stdout sys.stdout = old_stdout
def test_score_samples(): """Test score_samples (pseudo-likelihood) method.""" # Assert that pseudo-likelihood is computed without clipping. # See Fabian's blog, http://bit.ly/1iYefRk rng = np.random.RandomState(42) X = np.vstack([np.zeros(1000), np.ones(1000)]) rbm1 = BernoulliRBM(n_components=10, batch_size=2, n_iter=10, random_state=rng) rbm1.fit(X) assert_true((rbm1.score_samples(X) < -300).all()) # Sparse vs. dense should not affect the output. Also test sparse input # validation. rbm1.random_state = 42 d_score = rbm1.score_samples(X) rbm1.random_state = 42 s_score = rbm1.score_samples(lil_matrix(X)) assert_almost_equal(d_score, s_score) # Test numerical stability (#2785): would previously generate infinities # and crash with an exception. with np.errstate(under='ignore'): rbm1.score_samples(np.arange(1000) * 100)