plt.plot(x,avg_values,label="average") plt.xlabel('Feature Index') plt.xticks(x,x) plt.ylabel('Accuracy [%]') plt.title('Subset of Features Excluding Feature X') plt.legend() plt.grid() plt.savefig("subset_excluding_feature") exit(0) config = Config() dataloader = Dataloader(config,generate_tfidf=False,feature_flag=True) x_train, y_train = dataloader.get_train_dataloader() # model = KNeighborsClassifier(n_neighbors=1001,weights='distance') # scores = cross_val_score(model, x_train[:,[0,1,2,5,6,8,9]], y_train, cv=10) knn_cv_dict = { 'model_name': 'KNN', 'k_list': [1,3,5,7,9,11,13,15,17,19,21,25,31,41,51,61,71,91,121,151,181,211,251,301,351,401,501,601,801,1001] } svm_cv_dict = { 'model_name': 'SVM', 'c_list': [0.5,1,2,4], 'kernel_list': ['linear', 'poly', 'rbf', 'sigmoid'] } decision_tree_cv_dict = {
from dataloader import Dataloader from config import Config TEST_SIZE = 0.15 RANDOM_STATE = 42 if __name__ == "__main__": # words_list, y = create_data_list(input_file) config = Config() dataloader = Dataloader(config) # word1 = create_word2vec(words_list) # x_data = create_tfidf(words_list) # X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(dataloader.all, dataloader.all_labels, test_size=TEST_SIZE, random_state=RANDOM_STATE) x_train, y_train = dataloader.get_train_dataloader() x_test, y_test = dataloader.get_test_dataloader() print("Results with common sarcstic words: ") training.bernoulli_model(x_train, x_test, y_train, y_test) training.KNN_model(x_train, x_test, y_train, y_test) training.SVM_model(x_train, x_test, y_train, y_test) x_train_tfidf, y_train_tfidf = dataloader.get_train_dataloader(tfidf=True) x_test_tfidf, y_test_tfidf = dataloader.get_test_dataloader(tfidf=True) print() print("Results with common sarcstic words + TF-IDF: ") training.bernoulli_model(x_train_tfidf, x_test_tfidf, y_train_tfidf, y_test_tfidf) training.KNN_model(x_train_tfidf, x_test_tfidf, y_train_tfidf, y_test_tfidf) training.SVM_model(x_train_tfidf, x_test_tfidf, y_train_tfidf, y_test_tfidf)