Ejemplo n.º 1
0
from src.classification import CommonClassification
from src.preprocessing import CommonPreprocessing
from src.read_data import DataReader

SMOTE_K_NEIGHBOURS = 3
DATA_DIR = "../data"

data_reader = DataReader(DATA_DIR)
common_classifier = CommonClassification()
common_preprocessing = CommonPreprocessing()

keel_data = data_reader.read_keel_dat_directory()

for data in keel_data:
    # initialize file names and params
    print("Current dataset: " + str(data.file_name))
    csv_export_file_name = "../results/smote_oversampling_k3/data-naive_bayes.csv"
    output_csv = open(csv_export_file_name, "a")
    confusion_matrix_plot_file_name = "../results/smote_oversampling_k3/confusion_matrix/%s-%s.%s" % (
        data.file_name, "naive_bayes", "png")
    class_distribution_plot_file_name = "../results/smote_oversampling_k3/class_distribution/%s-%s.%s" % (
        data.file_name, "naive_bayes", "png")

    # Split dataset and perform classification
    testable = data.as_testable()
    sampled_testable = common_preprocessing.smote_over_sampling(testable, SMOTE_K_NEIGHBOURS)
    bayes_result = common_classifier.naive_bayes_gaussian_classification(sampled_testable)

    # Print plots and data to files
    print("%s;%s;%s;%s;%s" % (
        bayes_result.file_name, bayes_result.accuracy, bayes_result.precision, bayes_result.recall, bayes_result.f1),
Ejemplo n.º 2
0
import random

from src.classification import CommonClassification
from src.preprocessing import CommonPreprocessing
from src.read_data import DataReader

DATA_DIR = "../data"
data_reader = DataReader(DATA_DIR)
common_classifier = CommonClassification()
common_preprocessing = CommonPreprocessing()

all_keel_data = data_reader.read_keel_dat_directory()
keel_data = random.choice(all_keel_data)
#keel_data = data_reader.read_keel_dat_file("winequality-white-3_vs_7.dat")

random_testable_keel_data = keel_data.as_testable()
print("Testable:")
random_testable_keel_data.print_info()
random_testable_keel_data.plot_train_class_distribution()

classification_result = common_classifier.naive_bayes_gaussian_classification(
    random_testable_keel_data)
classification_result.print_info()
classification_result.plot_confusion_matrix()