start = time.time() y_test = [] y_pred = [] for j in range(len(data_test[i]["tweet"])): prediction = nb.predict(data_test[i]["tweet"][j], data_test[i]["target"][j]) y_test.append(data_test[i]["target"][j]) y_pred.append(prediction) print("nb pred") print(time.time() - start) start = time.time() cm = ConfusionMatrix() accuracy, precision, recall, fmeasure = cm.score(y_test, y_pred) print("Stopwords") print(stopwords) print("\nRemoved Stopwords") print(removed_words) print("\nAccuracy : {}".format(accuracy)) print("Precision : {}".format(precision)) print("Recall : {}".format(recall)) print("FMeasure : {}".format(fmeasure)) # df = pd.DataFrame({'X':x_array,'Y':y_array,'L':l_array,'K-Fold':kfold_per_combination,'Accuracy':list_acc,'Precision':list_prec,'Recall':list_recall,'F-Measure':list_fmeasure,'Fold Accuracy':fold_accuracy,'Fold Precision':fold_precision,'Fold Recall':fold_recall,'Fold F-Measure':fold_fmeasure}) # print(df) # df.to_excel(r'cobabarunih.xlsx', index = False, header=True)
tfidf = weight.get_tf_idf_weighting() idf = weight.get_idf() nb = NBMultinomial() nb.fit(new_cleaned_data, new_terms, data_train[i]["target"], stopwords, idf, tfidf) for j in range(len(data_test[i]["tweet"])): print("Test ke " + str(j)) prediction = nb.predict(data_test[i]["tweet"][j], data_test[i]["target"][j]) y_test.append(data_test[i]["target"][j]) y_pred.append(prediction) cm = ConfusionMatrix() accuracy, accuracy_each_class, precision_each_class, recall_each_class, fmeasure_each_class = cm.score( y_test, y_pred) acc_neg.append(accuracy_each_class[0]) acc_net.append(accuracy_each_class[1]) acc_pos.append(accuracy_each_class[2]) prec_neg.append(precision_each_class[0]) prec_net.append(precision_each_class[1]) prec_pos.append(precision_each_class[2]) recall_neg.append(recall_each_class[0]) recall_net.append(recall_each_class[1]) recall_pos.append(recall_each_class[2]) fmeasure_neg.append(fmeasure_each_class[0]) fmeasure_net.append(fmeasure_each_class[1]) fmeasure_pos.append(fmeasure_each_class[2]) acc_per_fold.append(accuracy)