Ejemplo n.º 1
0
                    clf.fit(X_train, y_train)

                elif alg == 'nb':
                    #need original categorical for naive bayes
                    y_train, y_test = y[train_index], y[test_index]
                    clf = GaussianNB()
                    clf.fit(X_train, y_train)

                else:
                    print("unkown classifier " + alg)

                y_pred = clf.predict(X_test)
                if alg != 'nb':
                    y_pred = unencode(y_pred)
                scores.append(Score(y_test, y_pred))
            avg_score = average_scores(scores)
            file.write(ds + "," + alg + "," + proc + "," +
                       str(avg_score.accuracy) + ", " +
                       str(avg_score.f1_positive) + "," +
                       str(avg_score.precision_positive) + "," +
                       str(avg_score.recall_positive) + "," +
                       str(avg_score.f1_neutral) + "," +
                       str(avg_score.precision_neutral) + "," +
                       str(avg_score.recall_neutral) + "," +
                       str(avg_score.f1_negative) + "," +
                       str(avg_score.precision_negative) + "," +
                       str(avg_score.recall_negative) + "\n")
            print("accuracy")
            print(avg_score.accuracy)

file.close()
Ejemplo n.º 2
0
"""

from sklearn.naive_bayes import MultinomialNB
from get_data import get_data_tfidf, get_data_custom
from Score import Score, average_scores
from sklearn.model_selection import KFold

X, y = get_data_custom("data-2_train.csv", 3, 2)

kf = KFold(n_splits=10)
kf.get_n_splits(X)
test_scores = []
train_scores = []

for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]
    gnb = MultinomialNB()
    gnb.fit(X_train, y_train)

    y_pred = gnb.predict(X_test)
    test_scores.append(Score(y_test, y_pred))

    y_pred = gnb.predict(X_train)
    train_scores.append(Score(y_train, y_pred))

average_score = average_scores(test_scores)
print("Average test score: " + str(average_score.accuracy))

average_train_score = average_scores(train_scores)
print("Average train score: " + str(average_train_score.accuracy))
Ejemplo n.º 3
0
from get_data import get_data_custom, one_hot_encode
from sklearn import tree
from sklearn.model_selection import KFold
from Score import Score, average_scores

print("Decision Tree:\n")

#you can replace this with whatver data getting method you want to try:
#file name, max gram length, min occurances of gram
X, y = get_data_custom('data-1_train.csv', 1, 1)

kf = KFold(n_splits=10)
kf.get_n_splits(X)
test_scores = []
for train_index, test_index in kf.split(X):
    X_train, X_test = X[train_index], X[test_index]
    y_train, y_test = y[train_index], y[test_index]

    dec_tree = tree.DecisionTreeClassifier()
    dec_tree.fit(X_train, y_train)

    y_pred = dec_tree.predict(X_test)
    test_scores.append(Score(y_test, y_pred))

average_score = average_scores(test_scores)

for score in test_scores:
    print(score.accuracy)

print("average accuracy: " + str(average_score.accuracy))