def combined_crossval(claim_ids, target, rootdist_matrix, tf_matrix, questionmark, folds=7, do_custom_folds=True): custom_folds = cv_fold_generator(claim_ids, folds) rootdist_feature = sparse.csr_matrix(rootdist_matrix) questionmark_feature = questionmark ppdb_alignment_feature = sparse.csr_matrix(get_ppdb_alignment_feature()) combined_all = sparse.hstack(( rootdist_feature, questionmark_feature, ppdb_alignment_feature, tf_matrix )) plot_2D_data(combined_all, target) if do_custom_folds: folds = custom_folds print("Classifier: ", '[accuracy,', 'f1_macro,', 'recall_macro,', 'precision_macro]') print("Logistic regression ovr L1: ", logistic_regression(combined_all, target, folds, 'l1', 1000000, 'ovr')) print("Logistic regression ovr L2: ", logistic_regression(combined_all, target, folds, 'l2', 1000000, 'ovr')) print("Logistic regression multiclass L1: ", logistic_regression(combined_all, target, folds, 'l1', 1000000, 'multinomial')) print("Logistic regression multiclass L2: ", logistic_regression(combined_all, target, folds, 'l2', 1000000, 'multinomial')) print("SVM Cross-validation") svm_crossval_grid(combined_all, target, folds) print("Naive Bayes: ", naive_bayes(combined_all.toarray(), target, folds))
def naive_bayes(input_dict): """ Naive Bayes algorithm for classification """ from sklearn.naive_bayes import GaussianNB y_pred = GaussianNB() output_dict = {} output_dict['bayes_out'] = c.naive_bayes() return output_dict
# Adding length of a each review feature print("After adding length review feature") X = functions.add_length_review_feature(X, length_of_reviews) print(X) # Adding Part of Speech Tag Feature print("After adding Part of Speech Tag feature") prp_list = functions.create_pos_features(reviews) X = functions.add_pos_feature(X, prp_list) print(X) # Logistic Regression # -------------------------------------------- classifier = classification.logistic_regression(X, scores) # Naive Bayes # -------------------------------------------- classifier = classification.naive_bayes(X, scores) # K Nearest Neighbors # -------------------------------------------- classifier = classification.knearest_neighbors(X, scores) # Decision Trees # -------------------------------------------- classifier = classification.decision_trees(X, scores) # Random Forests # -------------------------------------------- classifier = classification.random_forest(X, scores)
if __name__ == '__main__': print('Running Classifiers for dianping dataset') print("Does not include extra features") print("Using Bag of Words") print('------------------------------------------') stop = dianping.gather_stopwords() labels, reviews = dianping.read_chinese() BOW, vec = dianping.chinese_BOW(reviews, stop) # Logistic Regression # -------------------------------------------- classifier = classification.logistic_regression(BOW, labels) # Naive Bayes # -------------------------------------------- classifier = classification.naive_bayes(BOW, labels) # K Nearest Neighbors # -------------------------------------------- classifier = classification.knearest_neighbors(BOW, labels) # Decision Trees # -------------------------------------------- classifier = classification.decision_trees(BOW, labels) # Random Forests # -------------------------------------------- classifier = classification.random_forest(BOW, labels)
def naive_bayes(input_dict): """Naive Bayes learner""" p = input_dict['params'] return {'learner': classification.naive_bayes(p)}