def test_id3_gain_criteria(defs, db, chi_table, CI="0%"):
    print("\n==== Testing id3 using gain criteria =====\n")
    gain_criteria = InformationGainCriteria()
    #pdb.set_trace()
    tree = id3(gain_criteria, mydb, 'class', mydefs.attr_set,
               mydefs, chi_table, CI)
    tree.print_entire_tree()
Beispiel #2
0
def Run_over_dataset(file_path):
	# run each algorithm 5 times with same datasets
	for i in range(5):
		dataset1 = Load_dataset(file_path)
		# switch train and test
		dataset2 = deepcopy(dataset1)
		dataset2.train_dataset = dataset1.test_dataset
		dataset2.test_dataset = dataset1.train_dataset
		k_nearest_neighbor(dataset1)
		k_nearest_neighbor(dataset2)
		# naive_bayes(dataset1)
		# naive_bayes(dataset2)
		# tan(dataset1)
		# tan(dataset2)
		id3(dataset1)
		id3(dataset2)
		print()
def test_classify(testfilename, chi_table):
    print("\n\n==== Test classification====")
    testdb = ShroomDatabase([], testfilename)
    record = testdb.records[0]
    
    gain_criteria = InformationGainCriteria()
    gain_tree = id3(gain_criteria, mydb, 'class',
                    mydefs.attr_set, mydefs, chi_table)
    gain_classification = gain_tree.classify(record)

    misclass_error_criteria = ClassificationErrorCriteria()
    misclass_tree = id3(misclass_error_criteria, mydb,
                        'class', mydefs.attr_set, mydefs, chi_table)
    misclass_classification = misclass_tree.classify(record)

    print("\nClassification under gain and misclassification:")
    print("record to classify: " + record.get_raw_string())
    print("(gain) classification: " + gain_classification)
    print("(misclass) classification: " + misclass_classification)
 def generate_id3_tree(self, selection_criteria,
                       datadef_filename, training_filename,
                       chi_table, CI="0%"):
     """Generates an ID3 classification tree."""
     data_defs = ShroomDefs(datadef_filename)
     training_db = ShroomDatabase([], training_filename)
     classification_tree = id3(selection_criteria, training_db,
                               'class', data_defs.attr_set,
                               data_defs, chi_table, CI)
     return classification_tree
def test_id3_misclass_criteria(defs, db, chi_table, CI="0%"):
    print("\n==== Testing id3 using classifcation-error criteria =====\n")
    misclass_error_criteria = ClassificationErrorCriteria()
    tree = id3(misclass_error_criteria, mydb, 'class',
               mydefs.attr_set, mydefs, chi_table, CI)
    tree.print_entire_tree()
Beispiel #6
0
from results import *

#%% use training set to build decision trees
trees = {}
depths = [1, 2, 4, 8]
t_st = time.time()

for maxDepth in depths:
    trees[maxDepth] = {}
    print('\nMax Depth:', maxDepth)

    for i in np.arange(200):
        df = dataTrn.sample(int(0.1 * len(dataTrn)), replace=True)

        # input id3(data, maxDepth, init_depth)
        trees[maxDepth][i] = id3(df.values, maxDepth, 0)

        if np.mod(i, 10) == 0:
            print('.', end=" ")

t_en = time.time()
t_RunTrees = np.round((t_en - t_st) / 60, 3)
print('\nRuntime (m):', t_RunTrees)

#% transformer data
print('\n\nEnsemble Training Data')
dataTrfm_trn = transformData(dataTrn, trees, depths)
print('\n\nEnsemble Testing Data')
dataTrfm_tst = transformData(dataTst, trees, depths)

print('\nEnsemble Cross-Validation Data')
Beispiel #7
0
from pre_processing import *
from id3 import *
import random

input = pre_process_ratings()

data = input[::1000]
test = input

tree = id3(data)

#questions = ["Genero", "Idade", "Ocupação", "Categoria"]
#printtree(tree, questions)
"""
num_pos = 0
num_neg = 0
for t in test:
    b = t[-1]
    if b == 1:
        num_pos += 1
    else:
        num_neg += 1
"""
correct = 0
fp_id3, fn_id3, tp_id3, tn_id3 = 0, 0, 0, 0
fp_priori, fn_priori, tp_priori, tn_priori = 0, 0, 0, 0
fp_random, fn_random, tp_random, tn_random = 0, 0, 0, 0

for t in test:
    # Decision Tree
    a = predict(t[:-1], tree)