def test_perceptron(): from models.perceptron import Perceptron x, y = np.random.randn(2, 500, 2), np.zeros([2, 500], dtype=int) x[0] += np.array([1, -1]) x[1] += np.array([-1, 1]) y[0] = -1 y[1] = 1 plot_scatter(x[0], x[1], 'Real') x = x.reshape(-1, 2) y = y.flatten() perceptron = Perceptron(input_dim=2, lr=1e-4) train_perceptron(perceptron, x, y, epochs=100) pred = perceptron.predict(x) plot_scatter_with_line(x[pred == -1], x[pred == 1], perceptron.weights, 'Pred') acc = np.sum(pred == y) / len(pred) print(f'Acc = {100 * acc:.2f}%')
def main(): # load datasets train_dataset = load_dataset("data/truecased_reviews_train.jsonl") dev_dataset = load_dataset("data/truecased_reviews_dev.jsonl") test_dataset = load_dataset("data/truecased_reviews_test.jsonl") # Part I: Feature Engineering # Step 1. create feature vectors by calling compute_features() with all three datasets as parameters train_vecs, dev_vecs, test_vecs = compute_features(train_dataset, dev_dataset, test_dataset) print("Proportion of +1 label: ", np.mean(train_dataset.y == 1)) print("Proportion of -1 label: ", np.mean(train_dataset.y == -1)) # Step 2. train a Naive Bayes Classifier (scikit MultinomialNB() ) # TODO complete implementation mnb = MultinomialNB() mnb.fit(train_vecs, train_dataset.y) # fit model to train set # Step 3. Check performance prediction = mnb.predict(test_vecs) # test model test_acc = compute_average_accuracy( prediction, test_dataset.y) # compare actual and predicted labels print("Test Accuracy = ", test_acc) # Question 1(e) # calculate remaining vocabulary size vectorizere = CountVectorizer(tokenizer=no_tokenizer, lowercase=False, binary=True, min_df=2) train_X_features_e = vectorizere.fit_transform(train_dataset.X) print("Remaining vocabulary size = ", train_X_features_e.shape[1]) # Part II: Perceptron Algorithm # TODO: Implement the body of Perceptron.train() and Perceptron.predict() # parameters for the perceptron model num_epochs = 20 num_features = train_vecs.shape[1] averaged = False # only MSc students should need to touch this! # Step 1. Initialise model with hyperparameters perceptron = Perceptron(num_epochs, num_features, averaged, shuf=False) # Step 2. Train model print("Training model for {} epochs".format(num_epochs)) perceptron.train(train_vecs, train_dataset.y, dev_vecs, dev_dataset.y) #train model (original) plt.xlabel("Epochs") plt.ylabel("Accuracy") # plot graph plt.title("Perceptron Train & Dev Accuracy (original)") plt.legend() plt.savefig('Perceptron (original).jpg') plt.show(block=False) # Repeat for shuffled datasets perceptron_shuf = Perceptron(num_epochs, num_features, averaged, shuf=True) print("Training model for {} epochs".format(num_epochs)) perceptron_shuf.train(train_vecs, train_dataset.y, dev_vecs, dev_dataset.y) # train model (shuffled) plt.xlabel("Epochs") plt.ylabel("Accuracy") # plot graph plt.title("Perceptron Train & Dev Accuracy (shuffled)") plt.legend() plt.savefig('Perceptron (shuffled).jpg') plt.show() # Step 3. Compute performance on test set test_preds = perceptron.predict( test_vecs) # predict test set using unshuffled trained model test_accuracy = compute_average_accuracy(test_preds, test_dataset.y) print("\nTest accuracy is: ", test_accuracy) # Part III: Gradient Descent # TODO: Implement the body of GD.train() and GD.predict() # parameters for the gradient descent algorithm max_iter = 20 num_features = train_vecs.shape[1] # eta step (Change default value=0 and choose wisely! Double-check CW instructions) # lambda term for regularisation (also choose wisely!) # Step 1. Initialise model with hyperparameters # three sets of combinations of eta and lambda linear_model = GD(max_iter=max_iter, num_features=num_features, eta=0.000015, lam=10) linear_model2 = GD(max_iter=max_iter, num_features=num_features, eta=0.000009, lam=10) linear_model3 = GD(max_iter=max_iter, num_features=num_features, eta=0.000003, lam=100) # Step 2. Train model on a subset of the training set (first 10k examples) # train model with first set print("\nTraining model for {} max_iter".format(max_iter)) linear_model.train(train_vecs[:10000], train_dataset.y[:10000], dev_vecs, dev_dataset.y) plt.plot(range(max_iter), linear_model.train_acc_list, label="Train, eta=0.000015, lam=10") plt.plot(range(max_iter), linear_model.dev_acc_list, label="Dev, eta=0.000015, lam=10") # train model with second set linear_model2.train(train_vecs[:10000], train_dataset.y[:10000], dev_vecs, dev_dataset.y) plt.plot(range(max_iter), linear_model2.train_acc_list, label="Train, eta=0.000009, lam=10") plt.plot(range(max_iter), linear_model2.dev_acc_list, label="Dev, eta=0.000009, lam=10") # train model with third set linear_model3.train(train_vecs[:10000], train_dataset.y[:10000], dev_vecs, dev_dataset.y) plt.plot(range(max_iter), linear_model3.train_acc_list, label="Train, eta=0.000003, lam=100") plt.plot(range(max_iter), linear_model3.dev_acc_list, label="Dev, eta=0.000003, lam=100") plt.xlabel("Iterations") plt.ylabel("Accuracy") # plot graph plt.title("Gradient Descent Train & Dev Accuracy") plt.legend() plt.savefig("GD Accuracy Curve.png") # plot graph plt.show() # plot loss curves plt.plot(range(max_iter), linear_model.train_loss_list, label="Train, eta=0.000015, lam=10") plt.plot(range(max_iter), linear_model.dev_loss_list, label="Dev, eta=0.000015, lam=10") plt.plot(range(max_iter), linear_model2.train_loss_list, label="Train, eta=0.000009, lam=10") plt.plot(range(max_iter), linear_model2.dev_loss_list, label="Dev, eta=0.000009, lam=10") plt.plot(range(max_iter), linear_model3.train_loss_list, label="Train, eta=0.000003, lam=100") plt.plot(range(max_iter), linear_model3.dev_loss_list, label="Dev, eta=0.000003, lam=100") plt.xlabel("Iterations") plt.ylabel("Loss") # plot graph plt.title("Gradient Descent Loss Curve") plt.legend() plt.savefig("GD Loss Curve.png") plt.show() # Step 4. Compute performance on test set test_preds, pred_avg_loss = linear_model.predict( test_vecs, test_dataset.y) # use the model with the best eta and lambda test_acc = compute_average_accuracy(test_preds, test_dataset.y) print("Test accuracy = ", test_acc) print("Predicted Average Loss = ", pred_avg_loss)
# Data for AND gate X = np.array([[0, 0], [0, 1], [1, 0], [1, 1]], dtype='float32') y = np.array([[0], [0], [0], [1]], dtype='float32') # Define and train model model = Perceptron(data=X, labels=y, num_input=2) model.fit(alpha=0.1, epochs=5000) # Print results print('x1\tx2\tlabel\tprediction') for i in range(X.shape[0]): print('{x1}\t{x2}\t{label}\t{prediction}'.format(x1=X[i, 0], x2=X[i, 1], label=y[i, 0], prediction=model.predict( X[i, :])[0][0])) # Plot results weights = model.w bias = model.b x_fit, y_fit = np.linspace(-1, 2, 100), [] for x in x_fit: y_fit.append(-(weights[0, 0] * x + bias[0, 0]) / weights[0, 1]) plt.scatter(X[:, 0], X[:, 1], s=100, color='blue', label='Samples') plt.plot(x_fit, y_fit, color='red', label='Decision Boundary') plt.xlim(-0.2, 1.2) plt.ylim(-0.2, 1.2) plt.xlabel('Feature $x_1$') plt.ylabel('Feature $x_2$')