clf.fit(X_train, y_train) elif alg == 'nb': #need original categorical for naive bayes y_train, y_test = y[train_index], y[test_index] clf = GaussianNB() clf.fit(X_train, y_train) else: print("unkown classifier " + alg) y_pred = clf.predict(X_test) if alg != 'nb': y_pred = unencode(y_pred) scores.append(Score(y_test, y_pred)) avg_score = average_scores(scores) file.write(ds + "," + alg + "," + proc + "," + str(avg_score.accuracy) + ", " + str(avg_score.f1_positive) + "," + str(avg_score.precision_positive) + "," + str(avg_score.recall_positive) + "," + str(avg_score.f1_neutral) + "," + str(avg_score.precision_neutral) + "," + str(avg_score.recall_neutral) + "," + str(avg_score.f1_negative) + "," + str(avg_score.precision_negative) + "," + str(avg_score.recall_negative) + "\n") print("accuracy") print(avg_score.accuracy) file.close()
""" from sklearn.naive_bayes import MultinomialNB from get_data import get_data_tfidf, get_data_custom from Score import Score, average_scores from sklearn.model_selection import KFold X, y = get_data_custom("data-2_train.csv", 3, 2) kf = KFold(n_splits=10) kf.get_n_splits(X) test_scores = [] train_scores = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] gnb = MultinomialNB() gnb.fit(X_train, y_train) y_pred = gnb.predict(X_test) test_scores.append(Score(y_test, y_pred)) y_pred = gnb.predict(X_train) train_scores.append(Score(y_train, y_pred)) average_score = average_scores(test_scores) print("Average test score: " + str(average_score.accuracy)) average_train_score = average_scores(train_scores) print("Average train score: " + str(average_train_score.accuracy))
from get_data import get_data_custom, one_hot_encode from sklearn import tree from sklearn.model_selection import KFold from Score import Score, average_scores print("Decision Tree:\n") #you can replace this with whatver data getting method you want to try: #file name, max gram length, min occurances of gram X, y = get_data_custom('data-1_train.csv', 1, 1) kf = KFold(n_splits=10) kf.get_n_splits(X) test_scores = [] for train_index, test_index in kf.split(X): X_train, X_test = X[train_index], X[test_index] y_train, y_test = y[train_index], y[test_index] dec_tree = tree.DecisionTreeClassifier() dec_tree.fit(X_train, y_train) y_pred = dec_tree.predict(X_test) test_scores.append(Score(y_test, y_pred)) average_score = average_scores(test_scores) for score in test_scores: print(score.accuracy) print("average accuracy: " + str(average_score.accuracy))