コード例 #1
0
ファイル: main.py プロジェクト: saad486/FYP-II
def test():
    model = nb.NaiveBayesModel()
    path = 'E:/DATA/Sem8/fyp/Training.csv'
    final_df, df = model.extract('E:/DATA/Sem8/fyp/merge.csv')
    count = 0
    start = -200
    end = 0
    accuracy = []
    precision = []
    recall = []
    fscore = []
    stats = em.Evaluate()
    for count in range(5):
        df_test, df_train = split(final_df, start+200, end+200)
        print(df_train)
        li_clean_text = model.clean_data(df_train)
        uniqueWords = model.make_unique_li(li_clean_text)
    # # print(uniqueWords)
        docVector = model.binary_docvector(final_df, uniqueWords)
        df_WordGivenPI,df_WordGivenNoPi,Prob_PI,Prob_NoPI,numWordsInPI,numWordsInNoPI = model.TrainModel(docVector, uniqueWords)
        predict_df, test_data = model.Predict(Prob_PI, Prob_NoPI, uniqueWords, df_WordGivenPI, df_WordGivenNoPi, numWordsInPI, numWordsInNoPI)
        # print("--------------Naive Bayes Accuracy Stats---------------------------")
        TP, FN, TN, FP = stats.confusion_matrix(test_data, predict_df)
        accuracy.append(stats.Accuracy(TP, TN, FP, FN))
        precision.append(stats.Precision(TP, FP))
        recall.append(stats.Recall(TP, FN))
        fscore.append(stats.fScore(TP, FN, FP))
        # print("---------------------------------------------------------------------")
    print("accuracy = ",Average(accuracy))
    print("precison = ", Average(precision))
    print("recall = ", Average(recall))
    print("f-score = ", Average(fscore))
コード例 #2
0
def binary_naive_bayes():
    model = nb.NaiveBayesModel()
    clean = cn.DataCLean()
    doc_vector = dv.DocumentVector()
    df_clean, uniqueWords = clean.Clean()
    df_clean_test, df_clean_train = split(
        df_clean, 0, int(.3 * (df_clean['class'].count())))
    docVector = doc_vector.binary_docvector(df_clean_train, uniqueWords)
    # print(docVector)
    df_WordGivenPI, df_WordGivenNoPi, Prob_PI, Prob_NoPI, numWordsInPI, numWordsInNoPI = model.TrainModel(
        docVector, uniqueWords)
    # print("Model Trained")
    predict_df, test_data = model.predict(Prob_PI, Prob_NoPI, uniqueWords,
                                          df_WordGivenPI, df_WordGivenNoPi,
                                          numWordsInPI, numWordsInNoPI,
                                          df_clean_test, clean)

    print(
        "--------------Binary Naive Bayes Accuracy Stats---------------------------"
    )
    stats = em.Evaluate()
    TP, FN, TN, FP = stats.confusion_matrix(test_data, predict_df)
    print("Accuracy = ", stats.Accuracy(TP, TN, FP, FN))
    print("Precision = ", stats.Precision(TP, FP))
    print("Recall = ", stats.Recall(TP, FN))
    print("fScore = ", stats.fScore(TP, FN, FP))
    print("True Negative = ", stats.TrueNegative(TN, FP))
    print(
        "---------------------------------------------------------------------"
    )
コード例 #3
0
def binary_naive_bayes_kfold():
    model = nb.NaiveBayesModel()
    clean = cn.DataCLean()
    doc_vector = dv.DocumentVector()
    final_df, df = clean.extract(pathData)
    count = 0
    start = -200
    end = 0
    accuracy = []
    precision = []
    recall = []
    fscore = []
    true_neg = []
    stats = em.Evaluate()
    for count in range(5):
        start = start + 200
        end = end + 200
        df_test, df_train = split(final_df, start, end)
        # print(df_train)
        li_clean_text, df_clean = clean.clean_data(df_train)
        uniqueWords = clean.make_unique_li(li_clean_text)
        # # print(uniqueWords)
        docVector = doc_vector.binary_docvector(df_clean, uniqueWords)
        df_WordGivenPI, df_WordGivenNoPi, Prob_PI, Prob_NoPI, numWordsInPI, numWordsInNoPI = model.TrainModel(
            docVector, uniqueWords)
        predict_df, punc_df = model.predict(Prob_PI, Prob_NoPI, uniqueWords,
                                            df_WordGivenPI, df_WordGivenNoPi,
                                            numWordsInPI, numWordsInNoPI,
                                            df_test, clean)
        # print("--------------Naive Bayes Accuracy Stats---------------------------")
        TP, FN, TN, FP = stats.confusion_matrix(punc_df, predict_df)
        accuracy.append(stats.Accuracy(TP, TN, FP, FN))
        precision.append(stats.Precision(TP, FP))
        recall.append(stats.Recall(TP, FN))
        fscore.append(stats.fScore(TP, FN, FP))
        true_neg.append(stats.TrueNegative(TN, FP))
        # print("---------------------------------------------------------------------")
    print(
        "---------------------------------------------------------------------"
    )
    print("Binary Naive Bayes wit k-fold Accuracy Stats")
    print("accuracy = ", accuracy)
    print("precison = ", precision)
    print("recall = ", recall)
    print("f-score = ", fscore)
    print("True Negative = ", true_neg)
    print("accuracy = ", Average(accuracy))
    print("precison = ", Average(precision))
    print("recall = ", Average(recall))
    print("f-score = ", Average(fscore))
    print("true negative = ", Average(true_neg))
コード例 #4
0
ファイル: main.py プロジェクト: saad486/FYP-II
def binary_naive_bayes():
    model = nb.NaiveBayesModel()
    path = 'E:/DATA/Sem8/fyp/Training.csv'
    final_df, df = model.extract('E:/DATA/Sem8/fyp/Training.csv')
    li_clean_text = model.clean_data(final_df)
    uniqueWords = model.make_unique_li(li_clean_text)
    # print(uniqueWords)
    docVector = model.binary_docvector(final_df, uniqueWords)
    df_WordGivenPI,df_WordGivenNoPi,Prob_PI,Prob_NoPI,numWordsInPI,numWordsInNoPI = model.TrainModel(docVector, uniqueWords)
    predict_df, test_data = model.Predict(Prob_PI, Prob_NoPI, uniqueWords, df_WordGivenPI, df_WordGivenNoPi, numWordsInPI, numWordsInNoPI)

    print("--------------Naive Bayes Accuracy Stats---------------------------")
    stats = em.Evaluate()
    TP, FN, TN, FP = stats.confusion_matrix(test_data, predict_df)
    print("Accuracy = ",stats.Accuracy(TP, TN, FP, FN))
    print("Precision = ",stats.Precision(TP, FP))
    print("Recall = ",stats.Recall(TP, FN))
    print("fScore = ",stats.fScore(TP, FN, FP))
    print("---------------------------------------------------------------------")
コード例 #5
0
ファイル: main.py プロジェクト: saad486/FYP-II
def text_blob():
    model = nb.NaiveBayesModel()
    path = 'E:/DATA/Sem8/fyp/Training.csv'
    final_df, df = model.extract('E:/DATA/Sem8/fyp/Training.csv')
    corpus = model.text_concat(final_df)
    li_clean_text = model.clean_data(corpus)
    uniqueWords = model.make_unique_li(li_clean_text)
    docVector = model.DocVector(final_df, uniqueWords)
    polarity_docVector = tb.text_blob(docVector, uniqueWords)
    print(polarity_docVector['bad'])
    df_WordGivenPI, df_WordGivenNoPi, Prob_PI, Prob_NoPI, numWordsInPI, numWordsInNoPI = model.TrainModel(polarity_docVector, uniqueWords)
    predict_df, test_data = model.Predict(Prob_PI, Prob_NoPI, uniqueWords, df_WordGivenPI, df_WordGivenNoPi, numWordsInPI, numWordsInNoPI)

    print("--------------Naive Bayes with Text Blob Accuracy Stats---------------------------")
    stats = em.Evaluate()
    TP, FN, TN, FP = stats.confusion_matrix(test_data, predict_df)
    print("Accuracy = ", stats.Accuracy(TP, TN, FP, FN))
    print("Precision = ", stats.Precision(TP, FP))
    print("Recall = ", stats.Recall(TP, FN))
    print("fScore = ", stats.fScore(TP, FN, FP))
    print("---------------------------------------------------------------------")