from src.classification import CommonClassification from src.preprocessing import CommonPreprocessing from src.read_data import DataReader SMOTE_K_NEIGHBOURS = 3 DATA_DIR = "../data" data_reader = DataReader(DATA_DIR) common_classifier = CommonClassification() common_preprocessing = CommonPreprocessing() keel_data = data_reader.read_keel_dat_directory() for data in keel_data: # initialize file names and params print("Current dataset: " + str(data.file_name)) csv_export_file_name = "../results/smote_oversampling_k3/data-naive_bayes.csv" output_csv = open(csv_export_file_name, "a") confusion_matrix_plot_file_name = "../results/smote_oversampling_k3/confusion_matrix/%s-%s.%s" % ( data.file_name, "naive_bayes", "png") class_distribution_plot_file_name = "../results/smote_oversampling_k3/class_distribution/%s-%s.%s" % ( data.file_name, "naive_bayes", "png") # Split dataset and perform classification testable = data.as_testable() sampled_testable = common_preprocessing.smote_over_sampling(testable, SMOTE_K_NEIGHBOURS) bayes_result = common_classifier.naive_bayes_gaussian_classification(sampled_testable) # Print plots and data to files print("%s;%s;%s;%s;%s" % ( bayes_result.file_name, bayes_result.accuracy, bayes_result.precision, bayes_result.recall, bayes_result.f1),
import random from src.classification import CommonClassification from src.preprocessing import CommonPreprocessing from src.read_data import DataReader DATA_DIR = "../data" data_reader = DataReader(DATA_DIR) common_classifier = CommonClassification() common_preprocessing = CommonPreprocessing() all_keel_data = data_reader.read_keel_dat_directory() keel_data = random.choice(all_keel_data) #keel_data = data_reader.read_keel_dat_file("winequality-white-3_vs_7.dat") random_testable_keel_data = keel_data.as_testable() print("Testable:") random_testable_keel_data.print_info() random_testable_keel_data.plot_train_class_distribution() classification_result = common_classifier.naive_bayes_gaussian_classification( random_testable_keel_data) classification_result.print_info() classification_result.plot_confusion_matrix()