def run_multiple_voting(): """Run tests on multiple weighting systems given stored predictions of the classifiers in main() To be used in conjunction with alternative_main to determine which weighting method performs better """ # Load predictions from all classifiers and actual labels for test_set_1 preds1, actual1 = load_preds(1) # Load predictions from all classifiers and actual labels for test_set_2 preds2, actual2 = load_preds(2) # Create confusion matrices for each classifier b_cm = ConfusionMatrix(actual1, preds1[0], "Bayes") p_cm = ConfusionMatrix(actual1, preds1[1], "Proximity") v_cm = ConfusionMatrix(actual1, preds1[2], "Voting") # r_cm = ConfusionMatrix(actual1, preds1[2], "LSTM") confusionMatrices = [b_cm, p_cm, v_cm] # confusionMatices = [p_cm, v_cm, b_cm, r_cm] # Save individual confusion matrices to files for cm in confusionMatrices: cm.store_cm() print("Individual confusion matrices created and stored!") # Weight second set of results, using confusion matrices from first set weightingInput = [ [confusionMatrices[0], preds2[0]], [confusionMatrices[1], preds2[1]] # [confusionMatrices[2] ,b.batchTest(test_set2)], # [confusionMatrices[3], r.batchTest(test_set2)], ] # Get the weighted voting results votes_p = voting(weightingInput, "Precision") votes_CEN_p = voting(weightingInput, "CEN_Precision") votes_CEN = voting(weightingInput, "CEN") votes_eq = voting(weightingInput, "Equal_Vote") # Check metrics print(classification_report(actual2, votes_p)) print(classification_report(actual2, votes_CEN_p)) print(classification_report(actual2, votes_CEN)) print(classification_report(actual2, votes_eq)) # Create final confusion matrices depending on votes p_cm = ConfusionMatrix(actual2, votes_p, "Precision") p_CEN_cm = ConfusionMatrix(actual2, votes_CEN_p, "CEN_Precision") CEN_cm = ConfusionMatrix(actual2, votes_CEN, "CEN") eq_cm = ConfusionMatrix(actual2, votes_eq, "Equal") # Store confusion matrices p_cm.store_cm() p_CEN_cm.store_cm() CEN_cm.store_cm() eq_cm.store_cm() return votes_p, votes_CEN_p, votes_CEN, votes_eq
def test_weighting(): """Test weighting.py""" # Create confusion matrices for random classifiers yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred1 = np.random.randint(3, size=12) cm1 = ConfusionMatrix(yactual, ypred1, "cls_1") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred2 = np.random.randint(3, size=12) cm2 = ConfusionMatrix(yactual, ypred2, "cls_2") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred3 = np.random.randint(3, size=12) cm3 = ConfusionMatrix(yactual, ypred3, "cls_3") yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2] ypred4 = np.random.randint(3, size=12) cm4 = ConfusionMatrix(yactual, ypred4, "cls_4") weight_pairs = [[cm1, ypred1], [cm2, ypred2], [cm3, ypred3], [cm4, ypred4]] # Check that CEN score is being calculated #print(cm1.get_CEN_score(), cm2.get_CEN_score(), cm3.get_CEN_score(), cm4.get_CEN_score()) # Get final votes based on pairs votes_p = voting(weight_pairs, "Precision") votes_CEN_p = voting(weight_pairs, "CEN_Precision") votes_CEN = voting(weight_pairs, "CEN") votes_eq = voting(weight_pairs, "Equal_Vote") # Check metrics print(classification_report(yactual, votes_p)) print(classification_report(yactual, votes_CEN_p)) print(classification_report(yactual, votes_CEN)) print(classification_report(yactual, votes_eq)) # Create final confusion matrices depending on votes p_cm = ConfusionMatrix(yactual, votes_p, "Precision_Voting") p_CEN_cm = ConfusionMatrix(yactual, votes_CEN_p, "CEN_Precision_Voting") CEN_cm = ConfusionMatrix(yactual, votes_CEN, "CEN_Voting") eq_cm = ConfusionMatrix(yactual, votes_eq, "Equal_Voting") # Store confusion matrices p_cm.store_cm() p_CEN_cm.store_cm() CEN_cm.store_cm() eq_cm.store_cm()
def main(tperc, seed, fpaths): """Parses files, trains the models, tests the models, creates the weights, makes predictions and evaluates results""" files = openFiles(fpaths) instances = parseFiles(files) train_set, test_set1, test_set2 = splitSets(tperc, seed, instances) # Initialize all models b = BayesEliminationModel() p = ProximityModel() print("Initialized all models!") # Train all models p.train(train_set) b.train(train_set) print("Trained all models!") # Run models and store first set of results p_pred = p.batchTest(test_set1) b_pred = b.batchTest(test_set1) print("Predictions made for first test set!") # Store first set of predictions preds1 = [p_pred, b_pred] test_set1_labels = [i.getLabel() for i in test_set1] store_preds(preds1, test_set1_labels, 1) print("Stored predictions for first test set!") # Get confusion matrices for first set of results p_cm = ConfusionMatrix(test_set1_labels, p_pred, "Proximity") b_cm = ConfusionMatrix(test_set1_labels, b_pred, "Bayes") confusionMatrices = [p_cm, b_cm] # Save individual confusion matrices to files for cm in confusionMatrices: cm.store_cm() print("Individual confusion matrices created and stored!") # Second set of predictions p_pred2 = p.batchTest(test_set2) b_pred2 = b.batchTest(test_set2) print("Predictions made for second test set!") # Store second set of predictions preds2 = [p_pred2, b_pred2] test_set2_labels = [i.getLabel() for i in test_set2] store_preds(preds2, test_set2_labels, 2) print("Stored predictions for second test set!") # Weight second set of results, using confusion matrices from first set weightingInput = [[confusionMatrices[0], p_pred2], [confusionMatrices[1], b_pred2]] # Get the weighting results guesses = voting(weightingInput, weighting_type) print("Voting done!") # print(guesses) # Create confusion matrix for final model and store it in a file final_cm = ConfusionMatrix(test_set2_labels, guesses, "Final_Model_" + weighting_type) final_cm.store_cm() print("Stored confusion matrix!") # Store second set of tweets and guesses test_set2_tweets = [t.getFullTweet() for t in test_set2] store_new_labels(test_set2_tweets, guesses, test_set2_labels) print("Stored new predictions!")