def get_train_data(perc, freq):
    print("Feature selection percentile: %d" % perc)
    train_data = load_train_data("./data/train.txt",
                                 word_count_threshold=5,
                                 freq=freq,
                                 feature_selection_flag=True,
                                 percentile=perc)
    return train_data
Exemple #2
0
def test_bnbc():
    print_title("Binomial NBC")
    #percentile_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    percentile_list = [40]
    for perc in percentile_list:
        print("=" * 20)
        print("Feature selection percentile: %d" % perc)
        train_data = load_train_data(
            "./data/train.txt", word_count_threshold=2, freq=False,
            feature_selection_flag=True, percentile=perc)
        bnbc_func(train_data["x"], train_data["y"])
Exemple #3
0
def test_entropy():
    print_title("Max-Entropy Classifier")
    percentile_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    #percentile_list = [40]
    for perc in percentile_list:
        print("=" * 20)
        print("Feature selection percentile: %d" % perc)
        train_data = load_train_data(
            "./data/train.txt", word_count_threshold=1, freq=False,
            feature_selection_flag=True, percentile=perc)
        entropy_func(train_data['x'], train_data['y'])
Exemple #4
0
def test_svm(kernel):
    freq = False
    print_title("SVM Classifier")
    percentile_list = [10, 20, 30, 40, 50, 60, 70, 80, 90, 100]
    #percentile_list = [40]
    for perc in percentile_list:
        print("=" * 20 + "\nkernel: %s || freq: %s" % (kernel, freq))
        print("Feature selection percentile: %d" % perc)
        train_data = load_train_data(
            "./data/train.txt", word_count_threshold=1, freq=freq,
            feature_selection_flag=True, percentile=perc)
        svm_func(train_data['x'], train_data['y'], kernel=kernel)