Example #1
0
def combined_crossval(claim_ids, target, rootdist_matrix, tf_matrix, questionmark, folds=7, do_custom_folds=True):
    custom_folds = cv_fold_generator(claim_ids, folds)
    rootdist_feature = sparse.csr_matrix(rootdist_matrix)
    questionmark_feature = questionmark
    ppdb_alignment_feature = sparse.csr_matrix(get_ppdb_alignment_feature())

    combined_all = sparse.hstack((
        rootdist_feature,
        questionmark_feature,
        ppdb_alignment_feature,
        tf_matrix
    ))
    plot_2D_data(combined_all, target)

    if do_custom_folds:
        folds = custom_folds

    print("Classifier: ", '[accuracy,', 'f1_macro,', 'recall_macro,', 'precision_macro]')
    print("Logistic regression ovr L1: ", logistic_regression(combined_all, target, folds, 'l1', 1000000, 'ovr'))
    print("Logistic regression ovr L2: ", logistic_regression(combined_all, target, folds, 'l2', 1000000, 'ovr'))
    print("Logistic regression multiclass L1: ", logistic_regression(combined_all, target, folds, 'l1', 1000000, 'multinomial'))
    print("Logistic regression multiclass L2: ", logistic_regression(combined_all, target, folds, 'l2', 1000000, 'multinomial'))
    print("SVM Cross-validation")
    svm_crossval_grid(combined_all, target, folds)
    print("Naive Bayes: ", naive_bayes(combined_all.toarray(), target, folds))
Example #2
0
def naive_bayes(input_dict):
    """ Naive Bayes algorithm for classification """
    from sklearn.naive_bayes import GaussianNB
    y_pred = GaussianNB()
    output_dict = {}
    output_dict['bayes_out'] = c.naive_bayes()
    return output_dict
Example #3
0
    # Adding length of a each review feature
    print("After adding length review feature")
    X = functions.add_length_review_feature(X, length_of_reviews)
    print(X)

    # Adding Part of Speech Tag Feature
    print("After adding Part of Speech Tag feature")
    prp_list = functions.create_pos_features(reviews)
    X = functions.add_pos_feature(X, prp_list)
    print(X)

    # Logistic Regression
    # --------------------------------------------
    classifier = classification.logistic_regression(X, scores)

    # Naive Bayes
    # --------------------------------------------
    classifier = classification.naive_bayes(X, scores)

    # K Nearest Neighbors
    # --------------------------------------------
    classifier = classification.knearest_neighbors(X, scores)

    # Decision Trees
    # --------------------------------------------
    classifier = classification.decision_trees(X, scores)

    # Random Forests
    # --------------------------------------------
    classifier = classification.random_forest(X, scores)
if __name__ == '__main__':
    print('Running Classifiers for dianping dataset')
    print("Does not include extra features")
    print("Using Bag of Words")
    print('------------------------------------------')

    stop = dianping.gather_stopwords()
    labels, reviews = dianping.read_chinese()

    BOW, vec = dianping.chinese_BOW(reviews, stop)

    # Logistic Regression
    # --------------------------------------------
    classifier = classification.logistic_regression(BOW, labels)

    # Naive Bayes
    # --------------------------------------------
    classifier = classification.naive_bayes(BOW, labels)

    # K Nearest Neighbors
    # --------------------------------------------
    classifier = classification.knearest_neighbors(BOW, labels)

    # Decision Trees
    # --------------------------------------------
    classifier = classification.decision_trees(BOW, labels)

    # Random Forests
    # --------------------------------------------
    classifier = classification.random_forest(BOW, labels)
Example #5
0
def naive_bayes(input_dict):
    """Naive Bayes learner"""
    p = input_dict['params']
    return {'learner': classification.naive_bayes(p)}