def run_multiple_voting():
    """Run tests on multiple weighting systems given stored predictions of the classifiers in main()
       To be used in conjunction with alternative_main to determine which weighting method performs better
   """

    # Load predictions from all classifiers and actual labels for test_set_1
    preds1, actual1 = load_preds(1)

    # Load predictions from all classifiers and actual labels for test_set_2
    preds2, actual2 = load_preds(2)

    # Create confusion matrices for each classifier
    b_cm = ConfusionMatrix(actual1, preds1[0], "Bayes")
    p_cm = ConfusionMatrix(actual1, preds1[1], "Proximity")
    v_cm = ConfusionMatrix(actual1, preds1[2], "Voting")
    # r_cm = ConfusionMatrix(actual1, preds1[2], "LSTM")

    confusionMatrices = [b_cm, p_cm, v_cm]
    # confusionMatices = [p_cm, v_cm, b_cm, r_cm]

    # Save individual confusion matrices to files
    for cm in confusionMatrices:
        cm.store_cm()

    print("Individual confusion matrices created and stored!")

    # Weight second set of results, using confusion matrices from first set
    weightingInput = [
        [confusionMatrices[0], preds2[0]], [confusionMatrices[1], preds2[1]]
        # [confusionMatrices[2] ,b.batchTest(test_set2)],
        # [confusionMatrices[3], r.batchTest(test_set2)],
    ]

    # Get the weighted voting results
    votes_p = voting(weightingInput, "Precision")
    votes_CEN_p = voting(weightingInput, "CEN_Precision")
    votes_CEN = voting(weightingInput, "CEN")
    votes_eq = voting(weightingInput, "Equal_Vote")

    # Check metrics
    print(classification_report(actual2, votes_p))
    print(classification_report(actual2, votes_CEN_p))
    print(classification_report(actual2, votes_CEN))
    print(classification_report(actual2, votes_eq))

    # Create final confusion matrices depending on votes
    p_cm = ConfusionMatrix(actual2, votes_p, "Precision")
    p_CEN_cm = ConfusionMatrix(actual2, votes_CEN_p, "CEN_Precision")
    CEN_cm = ConfusionMatrix(actual2, votes_CEN, "CEN")
    eq_cm = ConfusionMatrix(actual2, votes_eq, "Equal")

    # Store confusion matrices
    p_cm.store_cm()
    p_CEN_cm.store_cm()
    CEN_cm.store_cm()
    eq_cm.store_cm()

    return votes_p, votes_CEN_p, votes_CEN, votes_eq
Beispiel #2
0
def test_weighting():
    """Test weighting.py"""

    # Create confusion matrices for random classifiers
    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred1 = np.random.randint(3, size=12)

    cm1 = ConfusionMatrix(yactual, ypred1, "cls_1")

    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred2 = np.random.randint(3, size=12)

    cm2 = ConfusionMatrix(yactual, ypred2, "cls_2")

    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred3 = np.random.randint(3, size=12)

    cm3 = ConfusionMatrix(yactual, ypred3, "cls_3")

    yactual = [2, 0, 2, 2, 0, 1, 1, 2, 2, 0, 1, 2]
    ypred4 = np.random.randint(3, size=12)

    cm4 = ConfusionMatrix(yactual, ypred4, "cls_4")

    weight_pairs = [[cm1, ypred1], [cm2, ypred2], [cm3, ypred3], [cm4, ypred4]]

    # Check that CEN score is being calculated
    #print(cm1.get_CEN_score(), cm2.get_CEN_score(), cm3.get_CEN_score(), cm4.get_CEN_score())

    # Get final votes based on pairs
    votes_p = voting(weight_pairs, "Precision")
    votes_CEN_p = voting(weight_pairs, "CEN_Precision")
    votes_CEN = voting(weight_pairs, "CEN")
    votes_eq = voting(weight_pairs, "Equal_Vote")

    # Check metrics
    print(classification_report(yactual, votes_p))
    print(classification_report(yactual, votes_CEN_p))
    print(classification_report(yactual, votes_CEN))
    print(classification_report(yactual, votes_eq))

    # Create final confusion matrices depending on votes
    p_cm = ConfusionMatrix(yactual, votes_p, "Precision_Voting")
    p_CEN_cm = ConfusionMatrix(yactual, votes_CEN_p, "CEN_Precision_Voting")
    CEN_cm = ConfusionMatrix(yactual, votes_CEN, "CEN_Voting")
    eq_cm = ConfusionMatrix(yactual, votes_eq, "Equal_Voting")

    # Store confusion matrices
    p_cm.store_cm()
    p_CEN_cm.store_cm()
    CEN_cm.store_cm()
    eq_cm.store_cm()
Beispiel #3
0
def main(tperc, seed, fpaths, weighting_type):
    """Parses files, trains the models, tests the models,
    creates the weights, makes predictions and evaluates results"""

    files = openFiles(fpaths)
    instances = parseFiles(files)
    train_set, test_set1, test_set2 = splitSets(tperc, seed, instances)

    # Initialize all models

    #b = BayesEliminationModel()
    #v = VotingModel()

    # print("Initialized all models!")
    #
    # # Train all models
    #
    # print("Training Bayes...")
    # #b.train(train_set)
    # print("Training Voting...")
    # #v.train(train_set)
    #
    # print("Trained all models!")

    # Run models and store first set of results

    print("We have a total of", len(test_set1), len(test_set2), "tweets")
    #b_pred = b.batchTest(test_set1)
    r_pred = runLSTM(test_set1)
    #v_pred = v.batchTest(test_set1)
    print(r_pred)
    print("Predictions made for first test set!")

    # Store first set of predictions

    #preds1 = [b_pred, r_pred, v_pred]
    test_set1_labels = [i.getLabel() for i in test_set1]
    #store_preds(preds1, test_set1_labels, 1)

    print("Stored predictions for first test set!")

    # Run models and store second set of results

    #b_pred2 = b.batchTest(test_set2)
    r_pred2 = runLSTM(test_set2)
    #v_pred2 = v.batchTest(test_set2)

    print("Predictions made for second test set!")

    # Store second set of predictions

    #preds2 = [b_pred2, r_pred2, v_pred2]
    test_set2_labels = [i.getLabel() for i in test_set2]
    #store_preds(preds2, test_set2_labels, 2)

    print("Stored predictions for second test set!")

    # Get weighted votes

    votes_p, votes_CEN_p, votes_CEN, votes_eq = run_multiple_voting()

    if weighting_type == "Precision":
        guesses = votes_p
    elif weighting_type == "CEN":
        guesses = votes_CEN
    elif weighting_type == "CEN_Precision":
        guesses = votes_CEN_p
    else:
        guesses = votes_eq

    # Create confusion matrix for final model and store it in a file

    final_cm = ConfusionMatrix(test_set2_labels, guesses,
                               "Final_Model_" + weighting_type)
    final_cm.store_cm()
    print("Stored confusion matrix!")

    # Store second set of tweets and guesses

    test_set2_tweets = [t.getFullTweet() for t in test_set2]
    store_new_labels(test_set2_tweets, guesses, test_set2_labels)
    print("Stored new predictions!")
def main(tperc, seed, fpaths):
    """Parses files, trains the models, tests the models,
    creates the weights, makes predictions and evaluates results"""

    files = openFiles(fpaths)
    instances = parseFiles(files)
    train_set, test_set1, test_set2 = splitSets(tperc, seed, instances)

    # Initialize all models
    b = BayesEliminationModel()
    p = ProximityModel()

    print("Initialized all models!")

    # Train all models

    p.train(train_set)
    b.train(train_set)

    print("Trained all models!")

    # Run models and store first set of results

    p_pred = p.batchTest(test_set1)
    b_pred = b.batchTest(test_set1)

    print("Predictions made for first test set!")

    # Store first set of predictions

    preds1 = [p_pred, b_pred]
    test_set1_labels = [i.getLabel() for i in test_set1]
    store_preds(preds1, test_set1_labels, 1)

    print("Stored predictions for first test set!")

    # Get confusion matrices for first set of results

    p_cm = ConfusionMatrix(test_set1_labels, p_pred, "Proximity")
    b_cm = ConfusionMatrix(test_set1_labels, b_pred, "Bayes")

    confusionMatrices = [p_cm, b_cm]

    # Save individual confusion matrices to files

    for cm in confusionMatrices:
        cm.store_cm()

    print("Individual confusion matrices created and stored!")

    # Second set of predictions

    p_pred2 = p.batchTest(test_set2)
    b_pred2 = b.batchTest(test_set2)

    print("Predictions made for second test set!")

    # Store second set of predictions

    preds2 = [p_pred2, b_pred2]
    test_set2_labels = [i.getLabel() for i in test_set2]
    store_preds(preds2, test_set2_labels, 2)

    print("Stored predictions for second test set!")

    # Weight second set of results, using confusion matrices from first set

    weightingInput = [[confusionMatrices[0], p_pred2],
                      [confusionMatrices[1], b_pred2]]

    # Get the weighting results

    guesses = voting(weightingInput, weighting_type)
    print("Voting done!")
    # print(guesses)

    # Create confusion matrix for final model and store it in a file

    final_cm = ConfusionMatrix(test_set2_labels, guesses,
                               "Final_Model_" + weighting_type)
    final_cm.store_cm()
    print("Stored confusion matrix!")

    # Store second set of tweets and guesses

    test_set2_tweets = [t.getFullTweet() for t in test_set2]
    store_new_labels(test_set2_tweets, guesses, test_set2_labels)
    print("Stored new predictions!")