Ejemplo n.º 1
0
def main(args):
    #Modify stemming and lower case below. Note that our test cases may use both settings of the two parameters
    train_set, train_labels, dev_set, dev_labels = reader.load_dataset(args.training_dir,args.development_dir,stemming=False,lower_case=False)

    #predicted_labels = nb.naiveBayes(train_set, train_labels, dev_set)
    predicted_labels = nb.bigramBayes(train_set, train_labels, dev_set)
    accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(predicted_labels,dev_labels)
    print("Accuracy:",accuracy)
    print("False Positive", false_positive)
    print("Fale Negative", false_negative)
    print("True Positive", true_positive)
    print("True Negative", true_negative)
Ejemplo n.º 2
0
def main(args):
    #Modify stemming and lower case below. Note that our test cases may use both settings of the two parameters
    max_iterations = 10
    accuracy_limit = 0.87

    min_accuracy = 0
    max_accuracy = 0

    unigram_smoothing_parameter = 0.0625
    bigram_smoothing_parameter = 0.125
    bigram_lambda = 0.05
    # unigram smoothing parameter tuning domain
    min_unigram_smoothing_parameter = 0.0000001
    max_unigram_smoothing_parameter = 1.0
    # bigram smoothing parameter tuning domain
    min_bigram_smoothing_parameter = 0.0000001
    max_bigram_smoothing_parameter = 1.0
    # bigram_lambda tuning domain
    min_bigram_lambda = 0.0000001
    max_bigram_lambda = 1.0

    #bigram_lambda tuner
    iteration = 0
    while min_accuracy < accuracy_limit or max_accuracy < accuracy_limit:
        if iteration > max_iterations:
            break
        # min_bigram_lambda
        train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
            args.training_dir,
            args.development_dir,
            stemming=False,
            lower_case=False)
        predicted_labels = nb.bigramBayes(train_set, train_labels, dev_set,
                                          unigram_smoothing_parameter,
                                          bigram_smoothing_parameter,
                                          min_bigram_lambda)
        min_accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(
            predicted_labels, dev_labels)
        # max_bigram_lambda
        train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
            args.training_dir,
            args.development_dir,
            stemming=False,
            lower_case=False)
        predicted_labels = nb.bigramBayes(train_set, train_labels, dev_set,
                                          unigram_smoothing_parameter,
                                          bigram_smoothing_parameter,
                                          max_bigram_lambda)
        max_accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(
            predicted_labels, dev_labels)

        print("Iteration:", iteration)
        print("unigram_smoothing_parameter:", unigram_smoothing_parameter)
        print("bigram_smoothing_parameter:", bigram_smoothing_parameter)
        print("min_bigram_lambda:", min_bigram_lambda)
        print("max_bigram_lambda:", max_bigram_lambda)
        print("min_Accuracy:", min_accuracy)
        print("max_Accuracy:", max_accuracy)
        print("False Positive:", false_positive)
        print("False Negative:", false_negative)
        print("True Positive:", true_positive)
        print("True Negative:", true_negative)

        if (min_accuracy < max_accuracy):
            min_bigram_lambda += (max_bigram_lambda - min_bigram_lambda) / 2
            bigram_lambda = max_bigram_lambda
        else:
            max_bigram_lambda -= (max_bigram_lambda - min_bigram_lambda) / 2
            bigram_lambda = min_bigram_lambda
        iteration += 1

    # unigram_smoothing_parameter tuner
    iteration = 0
    while min_accuracy < accuracy_limit or max_accuracy < accuracy_limit:
        if iteration > max_iterations:
            break
        # min_unigram_smoothing_parameter
        train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
            args.training_dir,
            args.development_dir,
            stemming=False,
            lower_case=False)
        predicted_labels = nb.bigramBayes(train_set, train_labels, dev_set,
                                          min_unigram_smoothing_parameter,
                                          bigram_smoothing_parameter,
                                          bigram_lambda)
        min_accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(
            predicted_labels, dev_labels)
        # max_unigram_smoothing_parameter
        train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
            args.training_dir,
            args.development_dir,
            stemming=False,
            lower_case=False)
        predicted_labels = nb.bigramBayes(train_set, train_labels, dev_set,
                                          max_unigram_smoothing_parameter,
                                          bigram_smoothing_parameter,
                                          bigram_lambda)
        max_accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(
            predicted_labels, dev_labels)

        print("Iteration:", iteration)
        print("min_unigram_smoothing_parameter:",
              min_unigram_smoothing_parameter)
        print("max_unigram_smoothing_parameter:",
              max_unigram_smoothing_parameter)
        print("bigram_smoothing_parameter:", bigram_smoothing_parameter)
        print("bigram_lambda:", bigram_lambda)
        print("min_Accuracy:", min_accuracy)
        print("max_Accuracy:", max_accuracy)
        print("False Positive:", false_positive)
        print("False Negative:", false_negative)
        print("True Positive:", true_positive)
        print("True Negative:", true_negative)

        if (min_accuracy < max_accuracy):
            min_unigram_smoothing_parameter += (
                max_unigram_smoothing_parameter -
                min_unigram_smoothing_parameter) / 2
            unigram_smoothing_parameter = max_unigram_smoothing_parameter
        else:
            max_unigram_smoothing_parameter -= (
                max_unigram_smoothing_parameter -
                min_unigram_smoothing_parameter) / 2
            unigram_smoothing_parameter = min_unigram_smoothing_parameter
        iteration += 1

    # bigram_smoothing_parameter tuner
    iteration = 0
    while min_accuracy < accuracy_limit or max_accuracy < accuracy_limit:
        if iteration > max_iterations:
            break
        # min_bigram_smoothing_parameter
        train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
            args.training_dir,
            args.development_dir,
            stemming=False,
            lower_case=False)
        predicted_labels = nb.bigramBayes(train_set, train_labels, dev_set,
                                          unigram_smoothing_parameter,
                                          min_bigram_smoothing_parameter,
                                          bigram_lambda)
        min_accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(
            predicted_labels, dev_labels)
        # max_bigram_smoothing_parameter
        train_set, train_labels, dev_set, dev_labels = reader.load_dataset(
            args.training_dir,
            args.development_dir,
            stemming=False,
            lower_case=False)
        predicted_labels = nb.bigramBayes(train_set, train_labels, dev_set,
                                          unigram_smoothing_parameter,
                                          max_bigram_smoothing_parameter,
                                          bigram_lambda)
        max_accuracy, false_positive, false_negative, true_positive, true_negative = compute_accuracies(
            predicted_labels, dev_labels)

        print("Iteration:", iteration)
        print("unigram_smoothing_parameter:", unigram_smoothing_parameter)
        print("min_bigram_smoothing_parameter:",
              min_bigram_smoothing_parameter)
        print("max_bigram_smoothing_parameter:",
              max_bigram_smoothing_parameter)
        print("bigram_lambda:", bigram_lambda)
        print("min_Accuracy:", min_accuracy)
        print("max_Accuracy:", max_accuracy)
        print("False Positive:", false_positive)
        print("False Negative:", false_negative)
        print("True Positive:", true_positive)
        print("True Negative:", true_negative)

        if (min_accuracy < max_accuracy):
            min_bigram_smoothing_parameter += (
                max_bigram_smoothing_parameter -
                min_bigram_smoothing_parameter) / 2
            bigram_smoothing_parameter = max_bigram_smoothing_parameter
        else:
            max_bigram_smoothing_parameter -= (
                max_bigram_smoothing_parameter -
                min_bigram_smoothing_parameter) / 2
            bigram_smoothing_parameter = min_bigram_smoothing_parameter
        iteration += 1