Beispiel #1
0
def run(train_data, test_data):
    batch_size = 10
    n_samples = np.array(train_data).shape[0]
    n_batches = int(np.ceil(float(n_samples) / batch_size))
    batch_slices = list(
        gen_even_slices(n_batches * batch_size, n_batches, n_samples))

    nodes = [50, 75, 100, 150]

    for item in nodes:
        errors = []
        model = BernoulliRBM(n_components=item,
                             learning_rate=0.1,
                             batch_size=10,
                             n_iter=1,
                             random_state=None,
                             verbose=1)
        for _ in range(20):
            for batch_slice in batch_slices:
                model.partial_fit(train_data[batch_slice])
            errors.append(percent_error(model.gibbs(test_data), test_data))
        plot.plot_points(errors)
        plot.plot_heatmap(reformat_data(test_data[0]))
        plot.plot_heatmap(reformat_data(model.gibbs(test_data)[0]))

        if item == 50 or item == 100:
            plot.plot_heatmap(model.__dict__['components_'].reshape(item, 784))
Beispiel #2
0
def test_gibbs_smoke():
    # Check if we don't get NaNs sampling the full digits dataset.
    # Also check that sampling again will yield different results.
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=40, n_iter=20, random_state=42)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
    X_sampled2 = rbm1.gibbs(X)
    assert np.all((X_sampled != X_sampled2).max(axis=1))
Beispiel #3
0
def test_gibbs_smoke():
    """Check if we don't get NaNs sampling the full digits dataset.
    Also check that sampling again will yield different results."""
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=40, n_iter=20, random_state=42)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
    X_sampled2 = rbm1.gibbs(X)
    assert_true(np.all((X_sampled != X_sampled2).max(axis=1)))
Beispiel #4
0
def rbm(epochs, hidden, eta, graph_error_epoch_relation):
    print("Running the rbm...")

    if(graph_error_epoch_relation):
        print("Graphing average error as a function of epoch...")
        error_list = []
        for i in range(epochs):
            rbm = BernoulliRBM(n_components=hidden, learning_rate=eta, batch_size=100, n_iter=i, verbose = True, random_state = 1)
            rbm.fit(train_in)

            total_error = 0
            for image in train_in:
                reconstruction = rbm.gibbs(image).astype(int)
                error = mean_squared_error(image, reconstruction)
                total_error = total_error + error

            error_list.append(total_error/8000)
        print(error_list)
        plt.figure("Epoch-Loss relation in RBM")
        plt.plot(error_list)


    print("Creating reconstructed images, using test data...")
    rbm = BernoulliRBM(n_components=hidden, learning_rate=eta, batch_size=100, n_iter=epochs, verbose = True, random_state = 1)
    rbm.fit(train_in)


    plt.figure("RBM mnist digits", figsize = (20, 4))
    samples = [18, 3, 7, 0, 2, 1,15 , 8, 6 ,5 ]
    i = 0
    for index in samples:
        image = test_in[index]
        reconstruction = rbm.gibbs(image).astype(int)

        # display original
        ax = plt.subplot(2, 10, i + 1)
        plt.imshow(image.reshape(28, 28))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        # display reconstruction
        ax = plt.subplot(2, 10, i + 1 + 10)
        plt.imshow(reconstruction.reshape(28, 28))
        plt.gray()
        ax.get_xaxis().set_visible(False)
        ax.get_yaxis().set_visible(False)

        i = i + 1
    plt.show()
    return rbm
def test_rbm_pcd_gibbs(x_test,
                       Whv,
                       bh,
                       bv,
                       p_target=0.5,
                       n_gibbs_steps=5000,
                       thinning=10,
                       burnin=20):
    rbm = BernoulliRBM(n_components=Whv.shape[0], learning_rate=0.0)
    rbm.components_, rbm.intercept_hidden_, rbm.intercept_visible_ = Whv, bh, bv
    evidence_mask = np.random.binomial(
        1, p_target, x_test.shape)  # 0: target node, 1: evidence node,

    V = np.random.binomial(1, p_target, x_test.shape)
    V = x_test * evidence_mask + V * (1 - evidence_mask)
    prob1 = np.zeros_like(V)
    count = 0
    for it in range(n_gibbs_steps):
        V = rbm.gibbs(V)
        V = x_test * evidence_mask + V * (1 - evidence_mask)
        if (it + 1) % thinning == 0 and it > burnin:
            prob1 += V
            count += 1
    prob1 /= count
    prob1_clipped = prob1.clip(1e-15, 1 - 1e-15)
    target_mask = 1 - evidence_mask
    logp = x_test * np.log(prob1_clipped) + (
        1 - x_test) * np.log(1 - prob1_clipped)
    logp *= target_mask
    return -logp.sum() / target_mask.sum() / np.log(2)
Beispiel #6
0
def test_gibbs_smoke():
    """ just seek if we don't get NaNs sampling the full digits dataset """
    rng = np.random.RandomState(42)
    X = Xdigits
    rbm1 = BernoulliRBM(n_components=42, batch_size=10,
                        n_iter=20, random_state=rng)
    rbm1.fit(X)
    X_sampled = rbm1.gibbs(X)
    assert_all_finite(X_sampled)
class RBM:

    def __init__(self, images, n_components, learning_rate, batch_size, n_iter, random_state):
        """
        :param images: input data for the RBM neural network
        :param n_components: number of hidden units for the RBM neural network
        :param learning_rate: learning rate for the RBM neural network
        :param batch_size: batch size for the RBM neural network
        :param n_iter: number of iterations/epochs for the RBM neural network
        :param random_state: random state for the RBM neural network
        """
        self.images = images
        self.batch_size = batch_size
        self.epochs = n_iter
        self.x = 0
        self.rbm = BernoulliRBM(
            n_components=n_components,
            learning_rate=learning_rate,
            batch_size=batch_size,
            n_iter=self.epochs,
            random_state=random_state,
            verbose=1)

    def fit(self):
        """
        :return: void
        """
        self.x, _ = self.images.train.next_batch(self.batch_size)
        self.rbm.fit(self.x)

    def gibbs_sampling(self, k):
        """
        :param k: number of steps of Gibbs sampling
        :return: void
        """
        for i in range(k):
            gibbs_x = self.rbm.gibbs(self.x)
            self.x = np.zeros_like(self.x)
            self.x[gibbs_x] = 1

    def generate_images(self, num_hidden_nodes):
        """
        :param num_hidden_nodes: number of hidden nodes/units
        :return: void
        """
        plt.figure(figsize=(6, 6))
        for i, comp in enumerate(self.x):
            plt.subplot(10, 10, i + 1)
            plt.imshow(comp.reshape((28, 28)), cmap="gray", interpolation='nearest')
            plt.xticks(())
            plt.yticks(())
        plt.suptitle("RBM reconstructed image with " + str(num_hidden_nodes) + " hidden nodes", fontsize=16)
        plt.subplots_adjust(wspace=0.1, hspace=0.1)
        plt.savefig("RBM reconstructed image with " + str(num_hidden_nodes) + " hidden nodes.png")
Beispiel #8
0
def test_gibbs():
    rng = np.random.RandomState(42)
    X = Xdigits[:100]
    rbm1 = BernoulliRBM(n_components=2, batch_size=5,
                        n_iter=5, random_state=rng)
    rbm1.fit(X)

    Xt1 = np.mean([rbm1.gibbs(X[0]) for i in range(100)], 0)
    Xt2 = np.mean([rbm1._sample_visibles(rbm1._sample_hiddens(X[0], rng), rng)
                   for i in range(1000)], 0)

    assert_almost_equal(Xt1, Xt2, decimal=1)
Beispiel #9
0
def test_fit_gibbs():
    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]]
    # from the same input
    rng = np.random.RandomState(42)
    X = np.array([[0.], [1.]])
    rbm1 = BernoulliRBM(n_components=2, batch_size=2,
                        n_iter=42, random_state=rng)
    # you need that much iters
    rbm1.fit(X)
    assert_almost_equal(rbm1.components_,
                        np.array([[0.02649814], [0.02009084]]), decimal=4)
    assert_almost_equal(rbm1.gibbs(X), X)
    return rbm1
Beispiel #10
0
def test_fit_gibbs_sparse():
    # Gibbs on the RBM hidden layer should be able to recreate [[0], [1]] from
    # the same input even when the input is sparse, and test against non-sparse
    rbm1 = test_fit_gibbs()
    rng = np.random.RandomState(42)
    from scipy.sparse import csc_matrix
    X = csc_matrix([[0.], [1.]])
    rbm2 = BernoulliRBM(n_components=2, batch_size=2,
                        n_iter=42, random_state=rng)
    rbm2.fit(X)
    assert_almost_equal(rbm2.components_,
                        np.array([[0.02649814], [0.02009084]]), decimal=4)
    assert_almost_equal(rbm2.gibbs(X), X.toarray())
    assert_almost_equal(rbm1.components_, rbm2.components_)
Beispiel #11
0
class RBMModel(JointModel):

    def __init__(self, hyper_params, random=True, name=None):
        super(RBMModel, self).__init__(hyper_params, random, name)
        self.model = BernoulliRBM()

    def set_params(self, params):
        self.model = BernoulliRBM(**params)

    def evaluate(self, X):
        return self.model.score_samples(X).mean()

    def generate_samples(self, start, step=1):
        for i in range(step):
            start = self.model.gibbs(start)
        return start
Beispiel #12
0
train_target = readData('targetdigit_trn')
test_input = readData('bindigit_tst')
test_target = readData('targetdigit_tst')

#create and train model
rbm = BernoulliRBM(n_components=50,
                   learning_rate=.2,
                   batch_size=100,
                   n_iter=20)
rbm.fit(train_input, y=test_input)

original = []
for image in range(len(first_number_index)):
    original += [train_input[first_number_index[image]]]

reconstructed_boolean = rbm.gibbs(original)
reconstructed = np.zeros(np.shape(reconstructed_boolean))
for i in range(np.shape(reconstructed_boolean)[0]):
    for j in range(np.shape(reconstructed_boolean)[1]):
        if reconstructed_boolean[i][j]:
            reconstructed[i][j] = 1
        else:
            reconstructed[i][j] = 0
for idx in range(10):
    plt.imsave('images/rbm_org_' + str(idx) + '.png',
               original[idx].reshape(28, 28),
               cmap=cm.gray)
    plt.imsave('images/rbm_rec_' + str(idx) + '.png',
               reconstructed[idx].reshape(28, 28),
               cmap=cm.gray)
Beispiel #13
0
def make_rbm(data, components=50, n_iter=20):
    model = BernoulliRBM(n_components=components, learning_rate=0.1, batch_size=10, n_iter=n_iter, 
    	random_state=None, verbose=0)
    model = model.fit(data)
    print("error: {}".format(percent_error(model.gibbs(data), data)))
    return model
Beispiel #14
0
def main():
    #Read data
    train, train_targets = data_handling.read_train_dataset()
    test, test_targets = data_handling.read_test_dataset()

    ## Models with different number of hidden nodes: 50,75,100,150
    logistic_50 = linear_model.LogisticRegression()
    logistic_75 = linear_model.LogisticRegression()
    logistic_100 = linear_model.LogisticRegression()
    logistic_150 = linear_model.LogisticRegression()

    rbm_50 = BernoulliRBM(random_state=0, verbose=True)
    rbm_75 = BernoulliRBM(random_state=0, verbose=True)
    rbm_100 = BernoulliRBM(random_state=0, verbose=True)
    rbm_150 = BernoulliRBM(random_state=0, verbose=True)

    # Hyper-parameters:
    learning_rate = 0.06
    n_iter = 20
    # More components (hidden nodes) tend to give better prediction performance, but larger fitting time

    error_50 = []
    error_75 = []
    error_100 = []
    error_150 = []

    #Training:
    classifier_50, rbm_50 = train_classifier(rbm_50,
                                             logistic_50,
                                             train,
                                             train_targets,
                                             learning_rate,
                                             n_iter,
                                             n_hnodes=50)
    classifier_75, rbm_75 = train_classifier(rbm_75,
                                             logistic_75,
                                             train,
                                             train_targets,
                                             learning_rate,
                                             n_iter,
                                             n_hnodes=75)
    classifier_100, rbm_100 = train_classifier(rbm_100,
                                               logistic_100,
                                               train,
                                               train_targets,
                                               learning_rate,
                                               n_iter,
                                               n_hnodes=100)
    classifier_150, rbm_150 = train_classifier(rbm_150,
                                               logistic_150,
                                               train,
                                               train_targets,
                                               learning_rate,
                                               n_iter,
                                               n_hnodes=150)

    # Evaluation
    print("Evaluation:")
    print(
        "Logistic regression using RBM features with 50 hidden nodes:\n%s\n" %
        (metrics.classification_report(test_targets,
                                       classifier_50.predict(test))))

    print(
        "Logistic regression using RBM features with 75 hidden nodes:\n%s\n" %
        (metrics.classification_report(test_targets,
                                       classifier_75.predict(test))))
    print(
        "Logistic regression using RBM features with 100 hidden nodes:\n%s\n" %
        (metrics.classification_report(test_targets,
                                       classifier_100.predict(test))))
    print(
        "Logistic regression using RBM features with 150 hidden nodes:\n%s\n" %
        (metrics.classification_report(test_targets,
                                       classifier_150.predict(test))))

    # Plotting
    plot_images(rbm_50, rbm_75, rbm_100, rbm_150)

    # Predict test set
    # image from each digit
    example_digits_indexs = [
        18, 3, 7, 0, 2, 1, 14, 8, 6, 5
    ]  # indexs in the test partition digits: 0,1,2,3,4,5,6,7,8,9
    prediction_50 = rbm_50.gibbs(test).astype(int)
    prediction_75 = rbm_75.gibbs(test).astype(int)
    prediction_100 = rbm_100.gibbs(test).astype(int)
    prediction_150 = rbm_150.gibbs(test).astype(int)
    print(calculate_error(prediction_50, test))
    print(calculate_error(prediction_75, test))
    print(calculate_error(prediction_100, test))
    print(calculate_error(prediction_150, test))

    plt.figure(figsize=(20, 20))

    for index, i in enumerate(example_digits_indexs):
        plt.subplot(10, 5, 5 * index + 1)
        plt.imshow(test[i].reshape((28, 28)),
                   cmap=plt.cm.gray_r,
                   interpolation='nearest')
        plt.subplot(10, 5, 5 * index + 2)
        plt.imshow(prediction_50[i].reshape((28, 28)),
                   cmap=plt.cm.gray_r,
                   interpolation='nearest')
        plt.subplot(10, 5, 5 * index + 3)
        plt.imshow(prediction_75[i].reshape((28, 28)),
                   cmap=plt.cm.gray_r,
                   interpolation='nearest')
        plt.subplot(10, 5, 5 * index + 4)
        plt.imshow(prediction_100[i].reshape((28, 28)),
                   cmap=plt.cm.gray_r,
                   interpolation='nearest')
        plt.subplot(10, 5, 5 * index + 5)
        plt.imshow(prediction_150[i].reshape((28, 28)),
                   cmap=plt.cm.gray_r,
                   interpolation='nearest')
    plt.show()
    samples.append(np.array(clone).flatten())
    print(i)
samples = np.array(samples)
print(samples.shape)
random.shuffle(samples)
samples = np.array(samples)
train, test = samples[:1000], samples[1000:]

# Initializing the RBM and training it
epochs = int(input("How many epochs would you like? "))
rbm = BernoulliRBM(learning_rate = 0.1, n_iter = epochs, n_components = 512)
rbm.fit(train)
print("done")

# Recreating image
recreate = rbm.gibbs(cflaw).reshape(79, 106, 8)
#recreate = rbm.gibbs(train[0]).reshape(79, 106, 8)
accuracy = list(recreate.flatten() - train[0])
print(str(round(100*(accuracy.count(0)/66992), 4 )) + "%")
image = []
for y in range(0, len(recreate)):
    col = []
    for x in range(0, len(recreate[y])):
        row = []
        for i in range(0, len(recreate[y][x])):
            if recreate[y][x][i]:
                row.append('1')
            else:
                row.append('0')
        col.append(int("".join(row), base=2))
    image.append(col)
Beispiel #16
0
    for dim in [50, 75, 100, 150]:
        print "[Info] # Hidden Node =", dim
        model = BernoulliRBM(n_components=dim)
        model.set_params(learning_rate=0.02, random_state=1)
        RBM_error = []
        AE_error = []

        for n_iter in range(6):
            train_n_iter = (n_iter + 1) * 10
            model.set_params(n_iter=train_n_iter, verbose=False)
            print "Training RBM with n_iter =", train_n_iter, "and n_hidden =", dim, "..."
            model.fit(train_data)
            error_count = 0.
            for i in range(n_exp):
                error_count += np.sum(
                    np.sum(np.abs(model.gibbs(test_data) -
                                  test_data))) / 784. / 2000. / n_exp
            RBM_error.append(error_count)
            print "RBM error with n_iter =", train_n_iter, ":", error_count
        print "RMB Error with dim =", dim, ":", RBM_error
        plt.plot(x, RBM_error, label="RMB {}".format(dim))

        print "Training AE with n_iter =", 10, "and n_hidden =", dim, "..."
        model = MLPClassifier(hidden_layer_sizes=(dim, ),
                              random_state=1,
                              learning_rate_init=0.003,
                              verbose=False,
                              max_iter=10,
                              warm_start=True)
        model.fit(train_data, train_data)
        AE_error.append(