Beispiel #1
0
def test_perceptron():
    from models.perceptron import Perceptron

    x, y = np.random.randn(2, 500, 2), np.zeros([2, 500], dtype=int)
    x[0] += np.array([1, -1])
    x[1] += np.array([-1, 1])
    y[0] = -1
    y[1] = 1
    plot_scatter(x[0], x[1], 'Real')

    x = x.reshape(-1, 2)
    y = y.flatten()

    perceptron = Perceptron(input_dim=2, lr=1e-4)
    train_perceptron(perceptron, x, y, epochs=100)

    pred = perceptron.predict(x)
    plot_scatter_with_line(x[pred == -1], x[pred == 1], perceptron.weights, 'Pred')

    acc = np.sum(pred == y) / len(pred)
    print(f'Acc = {100 * acc:.2f}%')
Beispiel #2
0
def main():
    # load datasets
    train_dataset = load_dataset("data/truecased_reviews_train.jsonl")
    dev_dataset = load_dataset("data/truecased_reviews_dev.jsonl")
    test_dataset = load_dataset("data/truecased_reviews_test.jsonl")

    # Part I: Feature Engineering
    # Step 1. create feature vectors by calling compute_features() with all three datasets as parameters
    train_vecs, dev_vecs, test_vecs = compute_features(train_dataset,
                                                       dev_dataset,
                                                       test_dataset)

    print("Proportion of +1 label: ", np.mean(train_dataset.y == 1))
    print("Proportion of -1 label: ", np.mean(train_dataset.y == -1))

    # Step 2. train a Naive Bayes Classifier (scikit MultinomialNB() )
    # TODO complete implementation
    mnb = MultinomialNB()
    mnb.fit(train_vecs, train_dataset.y)  # fit model to train set

    # Step 3. Check performance
    prediction = mnb.predict(test_vecs)  # test model
    test_acc = compute_average_accuracy(
        prediction, test_dataset.y)  # compare actual and predicted labels
    print("Test Accuracy = ", test_acc)

    # Question 1(e)
    # calculate remaining vocabulary size
    vectorizere = CountVectorizer(tokenizer=no_tokenizer,
                                  lowercase=False,
                                  binary=True,
                                  min_df=2)
    train_X_features_e = vectorizere.fit_transform(train_dataset.X)
    print("Remaining vocabulary size = ", train_X_features_e.shape[1])

    # Part II: Perceptron Algorithm
    # TODO: Implement the body of Perceptron.train() and Perceptron.predict()
    # parameters for the perceptron model
    num_epochs = 20
    num_features = train_vecs.shape[1]
    averaged = False  # only MSc students should need to touch this!

    # Step 1. Initialise model with hyperparameters
    perceptron = Perceptron(num_epochs, num_features, averaged, shuf=False)

    # Step 2. Train model
    print("Training model for {} epochs".format(num_epochs))
    perceptron.train(train_vecs, train_dataset.y, dev_vecs,
                     dev_dataset.y)  #train model (original)
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")  # plot graph
    plt.title("Perceptron Train & Dev Accuracy (original)")
    plt.legend()
    plt.savefig('Perceptron (original).jpg')
    plt.show(block=False)

    # Repeat for shuffled datasets
    perceptron_shuf = Perceptron(num_epochs, num_features, averaged, shuf=True)
    print("Training model for {} epochs".format(num_epochs))
    perceptron_shuf.train(train_vecs, train_dataset.y, dev_vecs,
                          dev_dataset.y)  # train model (shuffled)
    plt.xlabel("Epochs")
    plt.ylabel("Accuracy")  # plot graph
    plt.title("Perceptron Train & Dev Accuracy (shuffled)")
    plt.legend()
    plt.savefig('Perceptron (shuffled).jpg')
    plt.show()

    # Step 3. Compute performance on test set
    test_preds = perceptron.predict(
        test_vecs)  # predict test set using unshuffled trained model
    test_accuracy = compute_average_accuracy(test_preds, test_dataset.y)
    print("\nTest accuracy is: ", test_accuracy)

    # Part III: Gradient Descent
    # TODO: Implement the body of GD.train() and GD.predict()

    # parameters for the gradient descent algorithm
    max_iter = 20
    num_features = train_vecs.shape[1]
    # eta step (Change default value=0 and choose wisely! Double-check CW instructions)
    # lambda term for regularisation (also choose wisely!)

    # Step 1. Initialise model with hyperparameters
    # three sets of combinations of eta and lambda
    linear_model = GD(max_iter=max_iter,
                      num_features=num_features,
                      eta=0.000015,
                      lam=10)
    linear_model2 = GD(max_iter=max_iter,
                       num_features=num_features,
                       eta=0.000009,
                       lam=10)
    linear_model3 = GD(max_iter=max_iter,
                       num_features=num_features,
                       eta=0.000003,
                       lam=100)

    # Step 2. Train model on a subset of the training set (first 10k examples)
    # train model with first set
    print("\nTraining model for {} max_iter".format(max_iter))
    linear_model.train(train_vecs[:10000], train_dataset.y[:10000], dev_vecs,
                       dev_dataset.y)
    plt.plot(range(max_iter),
             linear_model.train_acc_list,
             label="Train, eta=0.000015, lam=10")
    plt.plot(range(max_iter),
             linear_model.dev_acc_list,
             label="Dev, eta=0.000015, lam=10")

    # train model with second set
    linear_model2.train(train_vecs[:10000], train_dataset.y[:10000], dev_vecs,
                        dev_dataset.y)
    plt.plot(range(max_iter),
             linear_model2.train_acc_list,
             label="Train, eta=0.000009, lam=10")
    plt.plot(range(max_iter),
             linear_model2.dev_acc_list,
             label="Dev, eta=0.000009, lam=10")

    # train model with third set
    linear_model3.train(train_vecs[:10000], train_dataset.y[:10000], dev_vecs,
                        dev_dataset.y)
    plt.plot(range(max_iter),
             linear_model3.train_acc_list,
             label="Train, eta=0.000003, lam=100")
    plt.plot(range(max_iter),
             linear_model3.dev_acc_list,
             label="Dev, eta=0.000003, lam=100")

    plt.xlabel("Iterations")
    plt.ylabel("Accuracy")  # plot graph
    plt.title("Gradient Descent Train & Dev Accuracy")
    plt.legend()
    plt.savefig("GD Accuracy Curve.png")  # plot graph
    plt.show()

    # plot loss curves
    plt.plot(range(max_iter),
             linear_model.train_loss_list,
             label="Train, eta=0.000015, lam=10")
    plt.plot(range(max_iter),
             linear_model.dev_loss_list,
             label="Dev, eta=0.000015, lam=10")
    plt.plot(range(max_iter),
             linear_model2.train_loss_list,
             label="Train, eta=0.000009, lam=10")
    plt.plot(range(max_iter),
             linear_model2.dev_loss_list,
             label="Dev, eta=0.000009, lam=10")
    plt.plot(range(max_iter),
             linear_model3.train_loss_list,
             label="Train, eta=0.000003, lam=100")
    plt.plot(range(max_iter),
             linear_model3.dev_loss_list,
             label="Dev, eta=0.000003, lam=100")
    plt.xlabel("Iterations")
    plt.ylabel("Loss")  # plot graph
    plt.title("Gradient Descent Loss Curve")
    plt.legend()
    plt.savefig("GD Loss Curve.png")
    plt.show()

    # Step 4. Compute performance on test set
    test_preds, pred_avg_loss = linear_model.predict(
        test_vecs,
        test_dataset.y)  # use the model with the best eta and lambda
    test_acc = compute_average_accuracy(test_preds, test_dataset.y)
    print("Test accuracy = ", test_acc)
    print("Predicted Average Loss = ", pred_avg_loss)
# Data for AND gate
X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype='float32')
y = np.array([[0], [0], [0], [1]], dtype='float32')

# Define and train model
model = Perceptron(data=X, labels=y, num_input=2)
model.fit(alpha=0.1, epochs=5000)

# Print results
print('x1\tx2\tlabel\tprediction')
for i in range(X.shape[0]):
    print('{x1}\t{x2}\t{label}\t{prediction}'.format(x1=X[i, 0],
                                                     x2=X[i, 1],
                                                     label=y[i, 0],
                                                     prediction=model.predict(
                                                         X[i, :])[0][0]))

# Plot results
weights = model.w
bias = model.b

x_fit, y_fit = np.linspace(-1, 2, 100), []
for x in x_fit:
    y_fit.append(-(weights[0, 0] * x + bias[0, 0]) / weights[0, 1])

plt.scatter(X[:, 0], X[:, 1], s=100, color='blue', label='Samples')
plt.plot(x_fit, y_fit, color='red', label='Decision Boundary')
plt.xlim(-0.2, 1.2)
plt.ylim(-0.2, 1.2)
plt.xlabel('Feature $x_1$')
plt.ylabel('Feature $x_2$')