def test_id3_gain_criteria(defs, db, chi_table, CI="0%"): print("\n==== Testing id3 using gain criteria =====\n") gain_criteria = InformationGainCriteria() #pdb.set_trace() tree = id3(gain_criteria, mydb, 'class', mydefs.attr_set, mydefs, chi_table, CI) tree.print_entire_tree()
def Run_over_dataset(file_path): # run each algorithm 5 times with same datasets for i in range(5): dataset1 = Load_dataset(file_path) # switch train and test dataset2 = deepcopy(dataset1) dataset2.train_dataset = dataset1.test_dataset dataset2.test_dataset = dataset1.train_dataset k_nearest_neighbor(dataset1) k_nearest_neighbor(dataset2) # naive_bayes(dataset1) # naive_bayes(dataset2) # tan(dataset1) # tan(dataset2) id3(dataset1) id3(dataset2) print()
def test_classify(testfilename, chi_table): print("\n\n==== Test classification====") testdb = ShroomDatabase([], testfilename) record = testdb.records[0] gain_criteria = InformationGainCriteria() gain_tree = id3(gain_criteria, mydb, 'class', mydefs.attr_set, mydefs, chi_table) gain_classification = gain_tree.classify(record) misclass_error_criteria = ClassificationErrorCriteria() misclass_tree = id3(misclass_error_criteria, mydb, 'class', mydefs.attr_set, mydefs, chi_table) misclass_classification = misclass_tree.classify(record) print("\nClassification under gain and misclassification:") print("record to classify: " + record.get_raw_string()) print("(gain) classification: " + gain_classification) print("(misclass) classification: " + misclass_classification)
def generate_id3_tree(self, selection_criteria, datadef_filename, training_filename, chi_table, CI="0%"): """Generates an ID3 classification tree.""" data_defs = ShroomDefs(datadef_filename) training_db = ShroomDatabase([], training_filename) classification_tree = id3(selection_criteria, training_db, 'class', data_defs.attr_set, data_defs, chi_table, CI) return classification_tree
def test_id3_misclass_criteria(defs, db, chi_table, CI="0%"): print("\n==== Testing id3 using classifcation-error criteria =====\n") misclass_error_criteria = ClassificationErrorCriteria() tree = id3(misclass_error_criteria, mydb, 'class', mydefs.attr_set, mydefs, chi_table, CI) tree.print_entire_tree()
from results import * #%% use training set to build decision trees trees = {} depths = [1, 2, 4, 8] t_st = time.time() for maxDepth in depths: trees[maxDepth] = {} print('\nMax Depth:', maxDepth) for i in np.arange(200): df = dataTrn.sample(int(0.1 * len(dataTrn)), replace=True) # input id3(data, maxDepth, init_depth) trees[maxDepth][i] = id3(df.values, maxDepth, 0) if np.mod(i, 10) == 0: print('.', end=" ") t_en = time.time() t_RunTrees = np.round((t_en - t_st) / 60, 3) print('\nRuntime (m):', t_RunTrees) #% transformer data print('\n\nEnsemble Training Data') dataTrfm_trn = transformData(dataTrn, trees, depths) print('\n\nEnsemble Testing Data') dataTrfm_tst = transformData(dataTst, trees, depths) print('\nEnsemble Cross-Validation Data')
from pre_processing import * from id3 import * import random input = pre_process_ratings() data = input[::1000] test = input tree = id3(data) #questions = ["Genero", "Idade", "Ocupação", "Categoria"] #printtree(tree, questions) """ num_pos = 0 num_neg = 0 for t in test: b = t[-1] if b == 1: num_pos += 1 else: num_neg += 1 """ correct = 0 fp_id3, fn_id3, tp_id3, tn_id3 = 0, 0, 0, 0 fp_priori, fn_priori, tp_priori, tn_priori = 0, 0, 0, 0 fp_random, fn_random, tp_random, tn_random = 0, 0, 0, 0 for t in test: # Decision Tree a = predict(t[:-1], tree)