def Qc(train_set, test_set, laplace=False): """Handles the tasks of question c Arguments: train_set test_set Keyword Arguments: laplace {bool} -- are we to use Laplace smoothing or not (default: {False}) """ gen_error_vec = [] known_error_vec = [] unknown_error_vec = [] viterbi_results = [] train_set = clean_POS(train_set) test_set = clean_POS(test_set) S = initialize_S(train_set) probs = Probabilities(S, train_set=train_set, test_set=test_set) for xy_tup in test_set: x = [t[0] for t in xy_tup] y = [t[1] for t in xy_tup] viterbi_tags = viterbi(x, probs, laplace) viterbi_results.append(viterbi_tags) err_vec, known_0, unknonwn_0 = (calculate_error( viterbi_tags, y, x, probs)) gen_error_vec.append(err_vec[0]) if not known_0: known_error_vec.append(err_vec[1]) if not unknonwn_0: unknown_error_vec.append(err_vec[2]) gen_error = statistics.mean(gen_error_vec) known_error = statistics.mean(known_error_vec) unknown_error = statistics.mean(unknown_error_vec) return [gen_error, known_error, unknown_error]
def Qe(train_set, test_set, laplace=False): """Handles tasks of question e Arguments: train_set test_set Keyword Arguments: laplace {bool} -- (default: {False}) """ # initializations viterbi_results = [] gen_error_vec = [] known_error_vec = [] unknown_error_vec = [] # "clean" the train and test sets from complex tags train_set = clean_POS(train_set) test_set = clean_POS(test_set) S = initialize_S(train_set) probs = Probabilities(S, train_set, test_set) # Generate pseudo train and test sets and probability object pseudo_train = probs.generate_pseudo_set(train_set) pseudo_test = probs.generate_pseudo_set(test_set) pseudo_probs = Probabilities(S, pseudo_train, pseudo_test) for xy_tup in pseudo_test: x = [t[0] for t in xy_tup] y = [t[1] for t in xy_tup] viterbi_tags = viterbi(x, pseudo_probs, laplace) viterbi_results.append(viterbi_tags) err_vec, _, _ = (calculate_error(viterbi_tags, y, x, probs, True)) gen_error_vec.append(err_vec[0]) # update confusion values pseudo_probs.update_confusion_matrix(y, viterbi_tags) gen_error = statistics.mean(gen_error_vec) print(gen_error) # print results and statistics if laplace: print(DataFrame(confusion_matrix(S, pseudo_probs))) return gen_error