def main(): test_data_path = 'train.data.csv' test_scheme_path = 'wine.names.csv' # test_data_path = 'datasets/iris.data' # test_scheme_path = 'datasets/iris.names' data, attributes, value_type = read(test_data_path, test_scheme_path) random.shuffle(data) train_dataset = pre_process(data, attributes, value_type) cars = rule_generator(train_dataset, 0.22, 0.6) cars.prune_rules(train_dataset) cars.rules = cars.pruned_rules classifier_m1 = classifier_builder_m1(cars, train_dataset) # error_rate = get_error_rate(classifier_m1, train_dataset) total_car_number = len(cars.rules) # total_classifier_rule_num = len(classifier_m1.rule_list) # print("_______________________________________________________") # print(error_rate) # print("_______________________________________________________") # print(total_classifier_rule_num) print("_______________________________________________________") cars.print_rule() print("_______________________________________________________") cars.prune_rules(train_dataset) cars.print_pruned_rule() print("_______________________________________________________") print() classifier_m1.print() print("_______________________________________________________") print(total_car_number)
def cross_validate_m1_without_prune(data_path, scheme_path, minsup=0.01, minconf=0.5): data, attributes, value_type = read(data_path, scheme_path) random.shuffle(data) dataset = pre_process(data, attributes, value_type) block_size = int(len(dataset) / 10) split_point = [k * block_size for k in range(0, 10)] split_point.append(len(dataset)) cba_rg_total_runtime = 0 cba_cb_total_runtime = 0 total_car_number = 0 total_classifier_rule_num = 0 error_total_rate = 0 for k in range(len(split_point) - 1): print("\nRound %d:" % k) training_dataset = dataset[:split_point[k]] + dataset[split_point[k + 1]:] test_dataset = dataset[split_point[k]:split_point[k + 1]] start_time = time.time() cars = rule_generator(training_dataset, minsup, minconf) end_time = time.time() cba_rg_runtime = end_time - start_time cba_rg_total_runtime += cba_rg_runtime start_time = time.time() classifier_m1 = classifier_builder_m1(cars, training_dataset) end_time = time.time() cba_cb_runtime = end_time - start_time cba_cb_total_runtime += cba_cb_runtime error_rate = get_error_rate(classifier_m1, test_dataset) error_total_rate += error_rate total_car_number += len(cars.rules) total_classifier_rule_num += len(classifier_m1.rule_list) print("CBA's error rate without pruning: %.1lf%%" % (error_rate * 100)) print("No. of CARs without pruning: %d" % len(cars.rules)) print("CBA-RG's run time without pruning: %.2lf s" % cba_rg_runtime) print("CBA-CB M1's run time without pruning: %.2lf s" % cba_cb_runtime) print("No. of rules in classifier of CBA-CB M1 without pruning: %d" % len(classifier_m1.rule_list)) print("\nAverage CBA's error rate without pruning: %.1lf%%" % (error_total_rate / 10 * 100)) print("Average No. of CARs without pruning: %d" % int(total_car_number / 10)) print("Average CBA-RG's run time without pruning: %.2lf s" % (cba_rg_total_runtime / 10)) print("Average CBA-CB M1's run time without pruning: %.2lf s" % (cba_cb_total_runtime / 10)) print( "Average No. of rules in classifier of CBA-CB M1 without pruning: %d" % int(total_classifier_rule_num / 10))
def cross_validate(data_path, scheme_path, class_first=False, minsup=0.1, minconf=0.6): data, attributes, value_type = read(data_path, scheme_path) if class_first: for i in range(len(data)): a = data[i].pop(0) data[i].append(a) a = attributes.pop(0) attributes.append(a) b = value_type.pop(0) value_type.append(b) # print(data[0]) random.shuffle(data) dataset = pre_process(data, attributes, value_type) block_size = int(len(dataset) / 10) split_point = [k * block_size for k in range(0, 10)] split_point.append(len(dataset)) cba_rg_total_runtime = 0 cba_cb_total_runtime = 0 total_car_number = 0 total_classifier_rule_num = 0 error_total_rate = 0 acc_total = 0 for k in range(len(split_point) - 1): print("\nRound %d:" % k) training_dataset = dataset[:split_point[k]] + dataset[split_point[k + 1]:] test_dataset = dataset[split_point[k]:split_point[k + 1]] start_time = time.time() cars = rule_generator(training_dataset, minsup, minconf) end_time = time.time() cba_rg_runtime = end_time - start_time cba_rg_total_runtime += cba_rg_runtime start_time = time.time() classifier = classifier_builder_m1(cars, training_dataset) end_time = time.time() cba_cb_runtime = end_time - start_time cba_cb_total_runtime += cba_cb_runtime classifier.print() res = acc(classifier, test_dataset) acc_total += res error_rate = get_error_rate(classifier, test_dataset) error_total_rate += error_rate total_car_number += len(cars.rules) total_classifier_rule_num += len(classifier.rule_list) print("accuracy:", (res * 100)) print("No. of CARs : ", len(cars.rules)) print("CBA-RG's run time : s", cba_rg_runtime) print("CBA-CB M1's run time : s", cba_cb_runtime) print("No. of rules in classifier of CBA-CB: ", len(classifier.rule_list)) print("\n Average CBA's accuracy :", (acc_total / 10 * 100)) print("Average No. of CARs : ", (total_car_number / 10)) print("Average CBA-RG's run time: ", (cba_rg_total_runtime / 10)) print("Average CBA-CB run time: ", (cba_cb_total_runtime / 10)) print("Average No. of rules in classifier of CBA-CB: ", (total_classifier_rule_num / 10))