Beispiel #1
0
def smoothing_eval():
    #best implies least incorrect
    best_one = float("inf")
    best_two = 0
    bayes.multinomial_factor = 0.5
    smoothing_increment = 0.05
    while True:
        best_two = best_one

        bayes.multinomial_factor += smoothing_increment

        classification_rules = bayes.train_nb(train_docs, train_labels)
        best_guess = bayes.classify_documents(eval_docs, classification_rules,
                                              bayes.unique_set(train_labels))
        error_cases, acc, incorrects, seperation = bayes.accuracy(
            eval_labels, best_guess, eval_docs)
        best_one = incorrects
        log_file = open("smoothing_eval.txt", "a+")
        log_file.write(
            str(acc) + " " + str(seperation) + " " + str(incorrects) + " " +
            str(bayes.multinomial_factor) + "\n")
        log_file.close()
        print(bayes.multinomial_factor)
        print(str(best_one) + " " + str(best_two))
        if best_one > best_two:
            continue
def access():
    totalAcc = 0.0
    for i in range(10):
        labels, samples = bayes.load_data(_filename)
        trainIdx, testIdx = bayes.splitData(samples.index, 0.7)
        dic = bayes.buildVocabularyList(samples)
        trainVecs, trainLabels, pw = bayes.buildVecsAndLabels(
            samples[trainIdx], labels[trainIdx], dic)
        spamPro, pamPro = bayes.bayesian(trainVecs, trainLabels, dic)
        acc = bayes.accuracy(samples[testIdx], labels[testIdx], dic, spamPro,
                             pamPro, pw)
        totalAcc += acc
        print 'accuracy = %s' % acc
    averageAcc = float(totalAcc) / 10
    print 'average accuracy is %f' % averageAcc
Beispiel #3
0
def stopword_eval(stopwords):
    for i in reversed(range(0, len(stopwords))):
        removed_item = stopwords[i]
        stopwords.remove(removed_item)

        classification_rules = bayes.train_nb(train_docs, train_labels)
        best_guess = bayes.classify_documents(eval_docs, classification_rules,
                                              bayes.unique_set(train_labels))
        error_cases, acc, incorrects, seperation = bayes.accuracy(
            eval_labels, best_guess, eval_docs)
        log_file = open("stopword_eval.txt", "a+")
        log_file.write(
            str(acc) + " " + str(incorrects) + " " + " ".join(stopwords) +
            "\n")
        log_file.close()
Beispiel #4
0
def synonym_eval():
    # manually set max range each time
    for i in range(0, 430):
        bayes.synonoyms = {}
        with open("synonyms.txt", 'r') as syn_file:
            raw_synonoyms = syn_file.readlines()
            for index, line in enumerate(raw_synonoyms):
                if index > i:
                    line = line.replace("\n", "").upper().split("\t")
                    lhs = line[1].split(",")
                    rhs = line[0]
                    for key in lhs:
                        bayes.synonoyms[key] = rhs
        classification_rules = bayes.train_nb(train_docs, train_labels)
        best_guess = bayes.classify_documents(eval_docs, classification_rules,
                                              bayes.unique_set(train_labels))
        error_cases, acc, incorrects, seperation = bayes.accuracy(
            eval_labels, best_guess, eval_docs)
        log_file = open("synonym_eval.txt", "a+")
        log_file.write(
            str(acc) + " " + str(incorrects) + " " +
            " ".join(json.dumps(bayes.synonoyms)) + "\n")
        log_file.close()
Beispiel #5
0
    train_dinner_dict = bayes.fea_and_class(train_dinner)
    #---calculate the mean and cov according to class------------------------
    train_breakfast_summaries = bayes.normalsummarize(train_breakfast_dict)
    train_lunch_summaries = bayes.normalsummarize(train_lunch_dict)
    train_dinner_summaries = bayes.normalsummarize(train_dinner_dict)
    #---predict the class and get a list of classes--------------------------
    predictions = []
    for i in range(len(val_x_list)):
        if val_x_list[i][0] == "breakfast":
            del val_x_list[i][0]
            prediction = bayes.normalpredict(train_breakfast_dict,
                                             train_breakfast_summaries,
                                             val_x_list[i])
        if val_x_list[i][0] == "lunch":
            del val_x_list[i][0]
            prediction = bayes.normalpredict(train_lunch_dict,
                                             train_lunch_summaries,
                                             val_x_list[i])
        if val_x_list[i][0] == "dinner":
            del val_x_list[i][0]
            prediction = bayes.normalpredict(train_dinner_dict,
                                             train_dinner_summaries,
                                             val_x_list[i])
        predictions.append(prediction)
#calculate the accuracy using the validation and predictions--------------
    del val["Type"]
    val_array = np.array(val)
    val_list = val_array.tolist()
    accuracy += bayes.accuracy(val_list, predictions)
average_accuracy = accuracy / 1000.0
print average_accuracy
Beispiel #6
0
    with open("stopwords.txt", 'r') as stop_file:
        bayes.filter = stop_file.readlines()
        for index, line in enumerate(bayes.filter):
            bayes.filter[index] = line.replace("\n", "").upper()
    with open("synonyms.txt", 'r') as syn_file:
        raw_synonoyms = syn_file.readlines()
        for index, line in enumerate(raw_synonoyms):
            line = line.replace("\n", "").upper().split("\t")
            lhs = line[1].split(",")
            rhs = line[0]
            for key in lhs:
                bayes.synonoyms[key] = rhs
    print(bayes.synonoyms)
    #log_file = open("stopword_eval.txt", "a+")

    classification_rules = bayes.train_nb(train_docs, train_labels)
    best_guess = bayes.classify_documents(eval_docs, classification_rules,
                                          bayes.unique_set(train_labels))
    error_cases, acc, incorrects, seperation = bayes.accuracy(
        eval_labels, best_guess, eval_docs)

    #log_file.write(str(acc) + " " + str(incorrects) + " " + " ".join(bayes.filter)  + "\n")
    #log_file.close()

    for error in error_cases:
        print(error)
        pass

    #print(len(error_cases))
    #smoothing_eval()