Example #1
0
def Qc(train_set, test_set, laplace=False):
    """Handles the tasks of question c

    Arguments:
        train_set
        test_set 

    Keyword Arguments:
        laplace {bool} -- are we to use Laplace smoothing or not (default: {False})
    """
    gen_error_vec = []
    known_error_vec = []
    unknown_error_vec = []

    viterbi_results = []
    train_set = clean_POS(train_set)
    test_set = clean_POS(test_set)
    S = initialize_S(train_set)
    probs = Probabilities(S, train_set=train_set, test_set=test_set)
    for xy_tup in test_set:
        x = [t[0] for t in xy_tup]
        y = [t[1] for t in xy_tup]
        viterbi_tags = viterbi(x, probs, laplace)
        viterbi_results.append(viterbi_tags)
        err_vec, known_0, unknonwn_0 = (calculate_error(
            viterbi_tags, y, x, probs))
        gen_error_vec.append(err_vec[0])
        if not known_0: known_error_vec.append(err_vec[1])
        if not unknonwn_0: unknown_error_vec.append(err_vec[2])
    gen_error = statistics.mean(gen_error_vec)
    known_error = statistics.mean(known_error_vec)
    unknown_error = statistics.mean(unknown_error_vec)
    return [gen_error, known_error, unknown_error]
Example #2
0
def Qe(train_set, test_set, laplace=False):
    """Handles tasks of question e

    Arguments:
        train_set 
        test_set 

    Keyword Arguments:
        laplace {bool} -- (default: {False})
    """
    # initializations
    viterbi_results = []
    gen_error_vec = []
    known_error_vec = []
    unknown_error_vec = []

    # "clean" the train and test sets from complex tags
    train_set = clean_POS(train_set)
    test_set = clean_POS(test_set)

    S = initialize_S(train_set)
    probs = Probabilities(S, train_set, test_set)
    # Generate pseudo train and test sets and probability object
    pseudo_train = probs.generate_pseudo_set(train_set)
    pseudo_test = probs.generate_pseudo_set(test_set)
    pseudo_probs = Probabilities(S, pseudo_train, pseudo_test)
    for xy_tup in pseudo_test:
        x = [t[0] for t in xy_tup]
        y = [t[1] for t in xy_tup]
        viterbi_tags = viterbi(x, pseudo_probs, laplace)
        viterbi_results.append(viterbi_tags)
        err_vec, _, _ = (calculate_error(viterbi_tags, y, x, probs, True))
        gen_error_vec.append(err_vec[0])
        # update confusion values
        pseudo_probs.update_confusion_matrix(y, viterbi_tags)
    gen_error = statistics.mean(gen_error_vec)
    print(gen_error)
    # print results and statistics
    if laplace:
        print(DataFrame(confusion_matrix(S, pseudo_probs)))
    return gen_error