def logistic_regression(train_data, train_labels, test_data, test_labels): print(f'{LogisticRegression.__name__}:') # Create and train model lr_model = LogisticRegression(train_data.shape[1], eta=0.001, epochs=50) model = OneVersusRest(lr_model) model.train(train_data, train_labels) # Predict 2000 validation set samples and calculate accuracy test_data_2k = test_data[:len(test_labels)] test_pred = model.predict(test_data_2k) # Print metrics print('\nTest Accuracy: {:.02f}%\n'.format( 100 * accuracy(test_pred, test_labels))) mat, classes = confusion_matrix(test_pred, test_labels) print('Precision:\n{}\n'.format( np.round(precision(test_pred, test_labels), 2))) print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2))) print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2))) print('Confusion Matrix:') print(mat) # Predict 10000 test set samples and save predictions print('Predicting 10k samples...') test_pred = model.predict(test_data) save_predictions(logistic_regression.__name__, test_pred) print('Saved 10k predictions.\n')
def linear_svm(train_data, train_labels, test_data, test_labels): print(f'{LinearSVM.__name__}:') # Create and train model lsvm_model = LinearSVM(alpha=0.01, features=180) model = OneVersusRest(lsvm_model) model.train(train_data, train_labels) # Predict 2000 validation set samples and calculate accuracy test_data_2k = test_data[:len(test_labels)] test_pred = model.predict(test_data_2k) # Print metrics print('\nTest Accuracy: {:.02f}%\n'.format( 100 * accuracy(test_pred, test_labels))) mat, classes = confusion_matrix(test_pred, test_labels) print('Precision:\n{}\n'.format( np.round(precision(test_pred, test_labels), 2))) print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2))) print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2))) print('Confusion Matrix:') print(mat) # Predict 10000 test set samples and save predictions print('Predicting 10k samples...') test_pred = model.predict(test_data) save_predictions(linear_svm.__name__, test_pred) print('Saved 10k predictions.\n')
def nearest_neighbour(train_data, train_labels, test_data, test_labels): print(f'{NearestNeighbour.__name__}:') # Create and train model model = NearestNeighbour(5, dist=manhattan) model.train(train_data, train_labels) # Predict 2000 validation set samples and calculate accuracy test_data_2k = test_data[:len(test_labels)] test_pred = model.predict(test_data_2k) # Print metrics print('\nTest Accuracy: {:.02f}%\n'.format( 100 * accuracy(test_pred, test_labels))) mat, classes = confusion_matrix(test_pred, test_labels) print('Precision:\n{}\n'.format( np.round(precision(test_pred, test_labels), 2))) print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2))) print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2))) print('Confusion Matrix:') print(mat) # Predict 10000 test set samples and save predictions print('Predicting 10k samples...') test_pred = model.predict(test_data) save_predictions(nearest_neighbour.__name__, test_pred) print('Saved 10k predictions.\n')
def epoch_pass(batch, model, loss_fn, device): """ Returns confusion matrix and loss between the predicted output and target output :param DataLoader batch: :param nn_Module model: :param loss_fn: loss function :param str device: :return: """ input_ids = batch["input_ids"].to(device) attention_mask = batch["attention_mask"].to(device) targets = batch["encoded_genres"].to(device) batch_logits = model(input_ids, attention_mask) predictions = binary_labeling(batch_logits, threshold=0.5, device=device) return confusion_matrix(predictions, targets, is_torch=True), loss_fn(batch_logits, targets)
def main(): np.random.seed(seed) train1_dataset, test1_dataset, comp1_dataset, train2_dataset, comp2_dataset = load_datasets() # # retrain models # feature_vector1, feature_vector2 = load_feature_vectors(train1_dataset, train2_dataset) # model1 = retrain_model1(train1_dataset, feature_vector1) # model2 = retrain_model2(train2_dataset, feature_vector2) # load models model1, model2 = load_trained_models(train1_dataset, train2_dataset) test1_pred_tags, test1_true_tags = model1.predict(test1_dataset.sentences, beam=1, tqdm_bar=False) test1_accuracy = model1.score_func(test1_pred_tags, test1_true_tags) test1_confusion_matrix, test1_tags_accuracy = metrics.confusion_matrix(train1_dataset.tags, test1_pred_tags, test1_true_tags) worst10_test1_confusion_matrix = test1_confusion_matrix.loc[list(test1_tags_accuracy.keys())[:10], list(test1_tags_accuracy.keys())[:10]] worst10_test1_tags_accuracy = list(test1_tags_accuracy.items())[:10] print('test1_accuracy:', test1_accuracy) print('worst10_test1_tags_accuracy:', worst10_test1_tags_accuracy) print('worst10_test1_confusion_matrix:\n', worst10_test1_confusion_matrix) comp1_pred_tags = model1.predict(comp1_dataset.sentences, beam=5, tqdm_bar=False)[0] save_wtag(comp1_dataset, comp1_pred_tags, 1) comp2_pred_tags = model2.predict(comp2_dataset.sentences, beam=1, tqdm_bar=False)[0] save_wtag(comp2_dataset, comp2_pred_tags, 2) # insert path to comp1 and comp2 true tagged files (in a .wtag format) to perform accuracy evaluation comp1_tagged_path = 'comp1_tagged.wtag' comp2_tagged_path = 'comp2_tagged.wtag' try: comp1_true_dataset = preprocess.Dataset(comp1_tagged_path) comp1_accuracy = model1.score_func(comp1_pred_tags, [sentence[1] for sentence in comp1_true_dataset.sentences]) print(f'comp1_accuracy={comp1_accuracy}') except: pass try: comp2_true_dataset = preprocess.Dataset(comp2_tagged_path) comp2_accuracy = model2.score_func(comp2_pred_tags, [sentence[1] for sentence in comp2_true_dataset.sentences]) print(f'comp2_accuracy={comp2_accuracy}') except: pass
def neural_net(train_data, train_labels, test_data, test_labels): print(f'{NeuralNetwork.__name__}:') # Create and train model model = NeuralNetwork([ FlatDenseLayer((784, ), activation=tanh), FlatDenseLayer((100, ), activation=tanh), FlatDenseLayer((20, ), activation=tanh), FlatDenseLayer((10, ), activation=sigmoid), ], eta=0.01, batch_size=64, epochs=250) model.train(train_data, train_labels) # Predict 2000 validation set samples and calculate accuracy test_data_2k = test_data[:len(test_labels)] test_activations, test_pred = model.predict(test_data_2k) # Print metrics print('\nTest Accuracy: {:.02f}%\n'.format( 100 * accuracy(test_pred, test_labels))) mat, classes = confusion_matrix(test_pred, test_labels) print('Precision:\n{}\n'.format( np.round(precision(test_pred, test_labels), 2))) print('Recall:\n{}\n'.format(np.round(recall(test_pred, test_labels), 2))) print('F1:\n{}\n'.format(np.round(f1_score(test_pred, test_labels), 2))) print('Confusion Matrix:') print(mat) # Predict 10000 test set samples and save predictions print('Predicting 10k samples...') test_activations, test_pred = model.predict(test_data) print(len(test_pred)) save_predictions(neural_net.__name__, test_pred) print('Saved 10k predictions.\n')
column = extra%32 base+= (row+column*32) predictions[base] = preds[i] print("Done predicting character labels using CNN.") # Compute character error rate and word error rate before error correction print("PRE-ERROR CORRECTION") print("Computing character error rate (CER)...") cer = char_err_rate(predictions, kjv) CM = confusion_matrix(predictions, kjv) print(CM) ax = sns.heatmap(CM, cmap="jet", xticklabels=sorted(kjv.char_to_int.keys()), yticklabels=sorted(kjv.char_to_int.keys())) ax.set_title("Confusion Matrix") plt.xlabel("Character 2") plt.ylabel("Character 1") plt.show() print("Character error rate (CER): %.3f%%" % (cer * 100.0)) print("Computing word error rate (WER)...") wer = word_err_rate(predictions, kjv)