Example #1
0
def main():
    #Create our main trees
    tree1 = dtree.buildTree(m.monk1, m.attributes)
    tree2 = dtree.buildTree(m.monk2, m.attributes)
    tree3 = dtree.buildTree(m.monk3, m.attributes)

    #PLOT MONK1 - MEAN AND VARIANCE
    dataset = m.monk1
    testdata = m.monk1test

    #Overall error on test set
    benchmarkTreeMonk1 = dtree.buildTree(dataset, m.attributes)
    #print("BENCHMARK: ", 1-dtree.check(benchmarkTreeMonk1, testdata))

    plotMonk(dataset, testdata,
             "Mean error vs. Fraction - MONK1\n500 runs in each batch", True)
    plotMonk(dataset, testdata,
             "Variance vs. Fraction - MONK1\n500 runs in each batch", False)

    #PLOT MONK3 - MEAN AND VARIANCE
    dataset = m.monk3
    testdata = m.monk3test

    #Overall error on test set
    benchmarkTreeMonk3 = dtree.buildTree(dataset, m.attributes)
    #print("BENCHMARK: ", 1-dtree.check(benchmarkTreeMonk3, testdata))

    plotMonk(dataset, testdata,
             "Mean error vs. Fraction - MONK3\n500 runs in each batch", True)
    plotMonk(dataset, testdata,
             "Variance vs. Fraction - MONK3\n500 runs in each batch", False)
    plt.show()
Example #2
0
def calculate_best(Td,Vd):

    error = -sys.maxsize
    counter = 0
    current_tree = tree.buildTree(Td,m.attributes)
    tr = tree.buildTree(Td,m.attributes)
    tr_pruned = tree.allPruned(tr)
    
    while True:
        counter = 0
        count = len(tr_pruned)
        
        for x in tr_pruned:
            if tree.check(x,Vd) > error:
                error = tree.check(x,Vd)
                current_tree = x
                #print("current tree")
                #print(current_tree)
                #print("error")
                #print(error)
            else:
                counter = counter + 1
        
        if count == counter:
            break
            
        tr = current_tree
    
   # print("Selected tree:")
    #print(tr)
    #print("error:")
    #print(error)
    return error, tr
Example #3
0
def assignment5_id3():
    t1 = d.buildTree(m.monk1, m.attributes)
    #qt.drawTree(t1)
    print(1 - d.check(t1, m.monk1test))
    t2 = d.buildTree(m.monk2, m.attributes)
    print(1 - d.check(t2, m.monk2test))
    #qt.drawTree(t2)
    t3 = d.buildTree(m.monk3, m.attributes)
    print(1 - d.check(t3, m.monk3test))
Example #4
0
def build_tree():
  print "\n------------------------------\nAssignment 3 - Error\n------------------------------"
  tree = dt.buildTree(data.monk1, data.attributes)
#drawtree.drawTree(tree)
  print "Dataset\tE(train)\tE(test)"
  print "Monk1:\t%.6f\t%.6f" % (1-dt.check(tree, data.monk1), 1-dt.check(tree, data.monk1test))
  tree = dt.buildTree(data.monk2, data.attributes)
  print "Monk2:\t%.6f\t%.6f" % (1-dt.check(tree, data.monk2), 1-dt.check(tree, data.monk2test))
  tree = dt.buildTree(data.monk3, data.attributes)
  print "Monk3:\t%.6f\t%.6f" % (1-dt.check(tree, data.monk3), 1-dt.check(tree, data.monk3test))
Example #5
0
def bldTree():
	tree_monk1 = dtree.buildTree(mdata.monk1,mdata.attributes)
	tree_monk2 = dtree.buildTree(mdata.monk2,mdata.attributes)
	tree_monk3 = dtree.buildTree(mdata.monk3,mdata.attributes)
	print('MONK1 Performance on training set',dtree.check(tree_monk1,mdata.monk1))
	print('MONK1 Performance on test set',dtree.check(tree_monk1,mdata.monk1test))
	print('MONK2 Performance on training set',dtree.check(tree_monk2,mdata.monk2))
	print('MONK2 Performance on test set',dtree.check(tree_monk2,mdata.monk2test))
	print('MONK3 Performance on training set',dtree.check(tree_monk3,mdata.monk3))
	print('MONK3 Performance on test set',dtree.check(tree_monk3,mdata.monk3test))
Example #6
0
def ASSIGNMENT5():
    t1 = dtree.buildTree(m.monk1, m.attributes)
    print(dtree.check(t1, m.monk1test))
    print(dtree.check(t1, m.monk1))

    t2 = dtree.buildTree(m.monk2, m.attributes)
    print(dtree.check(t2, m.monk2test))
    print(dtree.check(t2, m.monk2))

    t3 = dtree.buildTree(m.monk3, m.attributes)
    print(dtree.check(t3, m.monk3test))
    print(dtree.check(t3, m.monk3))
def build_and_check_trees():
    tree_m1 = d.buildTree(m.monk1, m.attributes)
    tree_m2 = d.buildTree(m.monk2, m.attributes)
    tree_m3 = d.buildTree(m.monk3, m.attributes)

    print(1 - d.check(tree_m1, m.monk1))
    print(1 - d.check(tree_m2, m.monk2))
    print(1 - d.check(tree_m3, m.monk3))

    print(1 - d.check(tree_m1, m.monk1test))
    print(1 - d.check(tree_m2, m.monk2test))
    print(1 - d.check(tree_m3, m.monk3test))
Example #8
0
def ASSIGNMENT_5():
    print(" ")
    print("ASSIGNMENT(5)")
    print("ERROR:")
    t = buildTree(m.monk1, m.attributes)
    print("MONK-1      %f      %f" %
          (1 - check(t, m.monk1), 1 - check(t, m.monk1test)))
    t = buildTree(m.monk2, m.attributes)
    print("MONK-2      %f      %f" %
          (1 - check(t, m.monk2), 1 - check(t, m.monk2test)))
    t = buildTree(m.monk3, m.attributes)
    print("MONK-3      %f      %f" %
          (1 - check(t, m.monk3), 1 - check(t, m.monk3test)))
Example #9
0
def a5():
    t1 = d.buildTree(m.monk1, m.attributes)
    #pyqt.drawTree(t1)
    print("Accuracy monk 1 train" + str(d.check(t1, m.monk1)))
    print("Accuracy monk 1 test" + str(d.check(t1, m.monk1test)))
    t2 = d.buildTree(m.monk2, m.attributes)
    #pyqt.drawTree(t2)
    print("Accuracy monk 2 train" + str(d.check(t2, m.monk2)))
    print("Accuracy monk 2 test" + str(d.check(t2, m.monk2test)))
    t3 = d.buildTree(m.monk3, m.attributes)
    #pyqt.drawTree(t3)
    print("Accuracy monk 3 train" + str(d.check(t3, m.monk3)))
    print("Accuracy monk 3 test" + str(d.check(t3, m.monk3test)))
Example #10
0
def optimisePartitions1():  #runs
    tree1 = d.buildTree(m.monk1, m.attributes)
    score1 = d.check(tree1, m.monk1test)
    print("Performance of monk1 tree: " + str(score1) + "\n")
    for index, partition in enumerate(partitions):
        for j in range(runs):
            train1, val3 = d.partition(m.monk1, partition)
            tree1a = d.buildTree(train1, m.attributes)
            best1 = bestPrunedTree(tree1a, val3)
            bigList1.append(1 - d.check(best1, m.monk1test))
        errorList1.append(sum(bigList1) / len(bigList1))
        varianceList1.append(variance(bigList1, errorList1[index]))

    return errorList1, varianceList1
Example #11
0
def optimisePartitions3():  #runs
    tree3 = d.buildTree(m.monk3, m.attributes)
    score3 = d.check(tree3, m.monk3test)
    print("Performance of monk3 tree: " + str(score3) + "\n")
    for index, partition in enumerate(partitions):
        for j in range(runs):
            train3, val3 = d.partition(m.monk3, partition)
            tree3a = d.buildTree(train3, m.attributes)
            best3 = bestPrunedTree(tree3a, val3)
            bigList3.append(1 - d.check(best3, m.monk3test))
        errorList3.append(sum(bigList3) / len(bigList3))
        varianceList3.append(variance(bigList3, errorList3[index]))

    return errorList3, varianceList3
Example #12
0
def A3():
  t1 = dT.buildTree( m.monk1, m.attributes )
  print( dT.check( t1, m.monk1test ) )
  print( dT.check( t1, m.monk1 ) )
  print '\n'
  #draw.drawTree( t1 )

  t2 = dT.buildTree( m.monk2, m.attributes )
  print( dT.check( t2, m.monk2test ) )
  print '\n'
  #draw.drawTree( t2 )

  t3 = dT.buildTree( m.monk3, m.attributes )
  print( dT.check( t3, m.monk3test ) )
  print '\n'
def A3():
    t1 = dT.buildTree(m.monk1, m.attributes)
    print(dT.check(t1, m.monk1test))
    print(dT.check(t1, m.monk1))
    print '\n'
    #draw.drawTree( t1 )

    t2 = dT.buildTree(m.monk2, m.attributes)
    print(dT.check(t2, m.monk2test))
    print '\n'
    #draw.drawTree( t2 )

    t3 = dT.buildTree(m.monk3, m.attributes)
    print(dT.check(t3, m.monk3test))
    print '\n'
Example #14
0
def check_pruning(data_set):
    s_dict = dict()
    t_temp = d.buildTree(data_set.Train, m.attributes)
    prun_set = d.allPruned(t_temp)
    for temp in prun_set:
        s_dict[temp] = (d.check(temp, data_set.Test))
    return key_with_maxval(s_dict)
def best_partition(full_dataset):
    #Set local variables
    tmp_max_perf = 0
    max_partition = None
    plot_y = []

    for i in range(6):  #[0,1,2,3,4,5]
        i = (float(i) + 3) / 10  #[0.3,0.4,0.5,0.6,0.7,0.8]

        monk_train, monk_val = partition(full_dataset, i)

        #Get the best pruning for that partition
        max_prune, pruned_tree = prune(buildTree(monk_train, m.attributes),
                                       monk_val)

        #Compute performance for pruned_tree on the test set
        max_prune = check(pruned_tree, test_set[k])

        #print("\t NEW(%f), OLD(%f)" % (max_prune, tmp_max_perf))

        #Store the results in a list
        plot_y.append(1 - max_prune)

        #Compare perf with the best one
        if max_prune > tmp_max_perf:
            tmp_max_perf = max_prune
            max_partition = i

    return max_partition, tmp_max_perf, plot_y
Example #16
0
def findBestPrunedTree(originalTrainSet, fraction):
    """ Find the best pruned tree, given a training set and a fraction for partitioning. """
    trainSet, validationSet = partition(originalTrainSet.dataset, fraction)
    tree = d.buildTree(trainSet, m.attributes)

    bestTreeSoFar = tree
    bestPerformanceSoFar = d.check(tree, validationSet)
    print("Pruning " + originalTrainSet.name + " with fraction = " +
          str(fraction) + " and performance on new validation set = " +
          str(bestPerformanceSoFar))

    while (True):
        possibleWaysToPruneTree = d.allPruned(bestTreeSoFar)

        if (len(possibleWaysToPruneTree) == 0):
            print("No more ways to prune tree. Returning.")
            return bestTreeSoFar, bestPerformanceSoFar

        bestPrunedTree, performance = getBestPerformingTree(
            possibleWaysToPruneTree, validationSet)

        if (performance >= bestPerformanceSoFar):
            print("Found pruned tree which performed better: " +
                  str(performance))
            bestTreeSoFar = bestPrunedTree
            bestPerformanceSoFar = performance
        else:
            print("All pruned trees perform worse. Stopping here.")
            return bestTreeSoFar, bestPerformanceSoFar
Example #17
0
def pruningTest(dataset, fraction):  # returns the error classification ratio
    monktrain, monkval = partition(dataset, fraction)
    tree = dtree.buildTree(monktrain, m.attributes)
    curRatio = dtree.check(tree, monkval)
    maxR = prune(curRatio, tree, monkval)
    #print("Max is: {:f}".format(maxR))
    return 1 - maxR
def test_error_per_partition(monk, monktest):
    fraction_values = [0.3, 0.4, 0.5, 0.6, 0.7,0.8]
    test_error_mean = []
    test_error_std = []
    for partition_number in fraction_values:
        print(partition_number)
        testErrors_list =[]
        n_iters = 600
        for iter in range(n_iters):
            monktrain, monkval = partition(monk, partition_number)
            tree = d.buildTree(monktrain, m.attributes)
            prunedTree = getPrunedTree(tree, monkval)
            testError = 1 - d.check(prunedTree, monktest)
            testErrors_list.append(testError)
        print("all iters calculated")
        testErrors_np = np.array(testErrors_list)
        test_error_mean.append(testErrors_np.mean())
        test_error_std.append(testErrors_np.std())


    plt.scatter(fraction_values, test_error_mean, c=test_error_std)
    cbar = plt.colorbar()
    cbar.set_label('Standard deviation', rotation=270, labelpad=30)
    plt.xlabel("fraction parameter")
    plt.ylabel("Average classification error (test set)")
    plt.show()
Example #19
0
def calcNextTreeLevel():
    selectedAttribute = m.attributes[4]
    s1 = dtree.select(m.monk1, selectedAttribute, 1)
    s2 = dtree.select(m.monk1, selectedAttribute, 2)
    s3 = dtree.select(m.monk1, selectedAttribute, 3)
    s4 = dtree.select(m.monk1, selectedAttribute, 4)

    # Calculate information gain of subsets
    #ASSIGNMENT3(s1)
    #ASSIGNMENT3(s2)
    #ASSIGNMENT3(s3)
    #ASSIGNMENT3(s4)

    mc1 = dtree.mostCommon(s1)
    mc2 = dtree.mostCommon(s2)
    mc3 = dtree.mostCommon(s3)
    mc4 = dtree.mostCommon(s4)
    #print(mc1)
    #print(mc2)
    #print(mc3)
    #print(mc4)

    tree = dtree.buildTree(m.monk2test, m.attributes)
    print(tree)
    draw.drawTree(tree)
Example #20
0
def PRINT_TREE_AT_LEVEL_2():
    # A5
    print(" ")
    print("LEVEL 1:")
    print(m.attributes[4])
    Att = [None] * 4
    for value in range(1, 5):
        Att[value - 1] = select(m.monk1, m.attributes[4], value)

    print("LEVEL 2:")
    for A in Att:
        tmp = bestAttribute(A, m.attributes)
        print(tmp)
        if tmp == m.attributes[0]:
            for value in range(1, 4):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[1]:
            for value in range(1, 4):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[2]:
            for value in range(1, 3):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[3]:
            for value in range(1, 4):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[4]:
            for value in range(1, 5):
                print(mostCommon(select(A, tmp, value)))
        if tmp == m.attributes[5]:
            for value in range(1, 3):
                print(mostCommon(select(A, tmp, value)))
    print(" ")
    t = buildTree(m.monk1, m.attributes)
    drawTree(t)
Example #21
0
def getData1(iterations):
    fraction = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
    error = [0] * 6
    for i in range(6):
        error[i] = [0] * iterations
    #print("\nMonk1")
    for f in range(len(fraction)):
        #print("\nFactor: %.1f" % f)
        for i in range(0, iterations):
            monk1train, monk1val = partition(mdata.monk1, fraction[f])
            monk1tree = dtree.buildTree(monk1train, mdata.attributes)
            while True:
                prunelist = dtree.allPruned(monk1tree)
                temptree = monk1tree
                for x in prunelist:
                    if dtree.check(x, monk1val) >= dtree.check(
                            temptree, monk1val):
                        temptree = x

                if temptree == monk1tree:
                    break
                monk1tree = temptree

            error[f][i] = dtree.check(monk1tree, mdata.monk1test)
    return error
Example #22
0
def assignment4_p3(data, attributes, fraction):
    trainData, validData = partition(data, fraction)
    dataTree = d.buildTree(trainData, attributes)
    orgErr = 1 - d.check(dataTree, validData)
    # print("ORIGINAL ERR", orgErr)
    orgTree = dataTree
    #########################
    bestPrunedTreesList = []
    toPrune = []
    toPrune.append(orgTree)
    # bestPrunedTreesList.append(orgTree)
    err = orgErr
    bestErrorRate = err
    bestPrunedTreesList = getPrunedChildren(toPrune, bestErrorRate, validData)

    if len(bestPrunedTreesList) == 0:
        toReturn = toPrune[0]
    else:
        toReturn = bestPrunedTreesList[0]

    #   print(toReturn)
    # print("No. of best pruned trees:", len(bestPrunedTreesList))
    # for i in range(0, len(bestPrunedTreesList)):
    # print("Pruned Tree No. ", i, "test error rate: ", 1-d.check(bestPrunedTreesList[i], validData))
    #   print("Pruned Tree ", "test error rate: ", 1-d.check(toReturn, validData))

    # return bestPrunedTreesList
    return 1 - d.check(toReturn, validData)
Example #23
0
def pruneTree(train, validation, acc_desired):

    t = d.buildTree(train, m.attributes)
    accuracy = d.check(t, validation)
    accuracy_p = accuracy
    #print("Starting accuracy:" + str(accuracy))
    temp = t
    tt = 0
    while (tt < acc_desired):
        tt += 1
        temp = t
        tlist = d.allPruned(t)
        accuracy_p = 0
        for i in range(0, len(tlist)):
            #print(i)
            accuracy = d.check(tlist[i], validation)
            #print("Pruned tree no " + str(i) + " accuracy: " + str(accuracy))
            #print(accuracy_p)
            if (accuracy >= accuracy_p):
                accuracy_p = accuracy
                #print("Set new accuracy_p: " + str(accuracy_p))
                t = tlist[i]

        #print(str(acc_prev_tree) + " " + str(accuracy_p))

    if (d.check(temp, validation) > d.check(t, validation)):
        t = temp
    """ 
    print(t)
    print("Final accuracy: " + str(d.check(t, validation)))
    pyqt.drawTree(t) 
    """
    return t
def prunedtree(data,fraction):
    trainset,validationSet=partition(trainingset,fraction)
    tree =d.buildTree(trainset,m.attributes)
    bestTreeSoFar =tree    
    bestPerformance=d.check(tree,validationSet)  
    print("Pruning"+trainset+ "and fraction ="+ str(fraction)+
    "and performance on new validationSet ="+ str(bestPerformance))
    return bestTreeSoFar,bestPerformance    

#     bestTreeSoFar= tree
#     bestPerformance=d.check(tree,validationSet)









 


    
       
Example #25
0
def pruning(data_set, fraction = 0.6):
    # A function that returns a pruned decision tree from a data set
    data_train, data_val = partition(data_set, fraction)

    # The tree to become pruned
    tree_pruned = dtree.buildTree(data_train, m.attributes)
    err_tree_pru = dtree.check(tree_pruned, data_val)
#    print("Tree before prune:")
#    print(tree_pruned)

    better = True
    while better:
        better = False
        trees_alt = dtree.allPruned(tree_pruned)
        best_prune = None
        err_best = 0

        for alternative in trees_alt:
            err_alternative = dtree.check(alternative, data_val)

            if err_alternative >= err_tree_pru and err_alternative > err_best:
                best_prune = alternative
                err_best = err_alternative
                better = True

        if better:
            tree_pruned = best_prune
            err_tree_pru = err_best

    return tree_pruned
Example #26
0
def assignment7():
    datasets = [m.monk1, m.monk3]
    test = [m.monk1test, m.monk3test]
    name = ['Monk1', 'Monk2']
    fractions = [i * .1 for i in range(3, 9)]

    runs = 50

    scores = []
    scores_numbers = []

    for dataset, testset, name in zip(datasets, test, name):
        datasetScore = []
        for fraction in fractions:
            results = []
            for _ in range(runs):
                monktrain, monkval = partition(dataset, fraction)
                tree = dtree.buildTree(monktrain, m.attributes)
                tree, score = getTree(tree, monkval)

                results.append(1 - dtree.check(tree, testset))

            datasetScore.append((mean(results), variance(results)))

        scores_numbers.append(datasetScore)

        # scores.append(f'Fraction: {fraction}\nMean: {mean(results)}\nVariance: {variance(results)}')

    return scores_numbers
def buildtree():
    for i in range(len(trainingset)):
        tree=d.buildTree(trainingset[i].dataset,m.attributes)        
        performanceOnTrainData = d.check(tree,trainingset[i].dataset)
        performanceOnTestData=d.check(tree,testset[i].dataset)
        print("Error of " + trainingset[i].name+ "on " + testset[i].name + ":" + str(1-performanceOnTestData))
        print("Error of " + trainingset[i].name+ "on " + trainingset[i].name + ":" + str(1-performanceOnTrainData))
Example #28
0
def assignment4_p1(data, attributes, fraction):
    trainData, validData = partition(data, fraction)
    dataTree = d.buildTree(trainData, attributes)
    orgErr = 1 - d.check(dataTree, validData)
    print("ORIGINAL ERR", orgErr)
    orgTree = dataTree
    bestPrunedTree = orgTree
    cont = True
    while cont:
        err = orgErr
        bestErrorRate = err
        prunedTrees = d.allPruned(bestPrunedTree)
        print(len(prunedTrees))
        for i in range(0, len(prunedTrees)):
            err = 1 - d.check(prunedTrees[i], validData)
            print(i, err)
            if err < bestErrorRate:
                bestErrorRate = err
                bestPrunedTree = prunedTrees[i]
                print("Best Error Rate:", bestPrunedTree, bestErrorRate)

        if bestErrorRate > orgErr:
            return orgTree
        elif bestPrunedTree == dataTree:
            break
        # else:
        # if bestPrunedTree == prunedTrees:
        # prunedTrees = d.allPruned(bestPrunedTree)

        orgTree = bestPrunedTree
        orgErr = bestErrorRate
def pruneTree(dataset, testSet):
	
	fractions = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
	errorList = []

	for x in fractions:
		train, val = partition(dataset, x)
		theTree = tree.buildTree(train, data.attributes)

		list_of_trees = tree.allPruned(theTree)


		theBest = 1000
		bestTree = 0

		for t in list_of_trees:
			error = 1 - tree.check(t, val)

			if error < theBest:
				theBest = error
				bestTree = t
		draw.drawTree(bestTree)
		smallest_error_at_fraction = 1 - tree.check(bestTree, testSet)
		errorList.append(smallest_error_at_fraction)

		# print ("smalest error")
		# print (smallest_error_at_fraction)
		# print ("occured at fraction")
		# print (x)

	return errorList
Example #30
0
def tests(pair):
    tree=dtree.buildTree(pair[0], monkdata.attributes)
    return [
            pair[2],
            dtree.check(tree,pair[0]),
            dtree.check(tree,pair[1])
    ]
Example #31
0
def gen_validate_data(monkset, monktest, fraction):
    validation_values = []
    for x in range(1, 100):
        train, valid = partition(monkset, fraction)
        tree = d.buildTree(train, m.attributes)
        pruned = get_pruned(tree, valid)
        validation_values.append(1 - d.check(pruned, monktest))
    return validation_values
Example #32
0
def evaluate_fraction(data, fraction, monktest):
    #data = monkdata.monk1
    res = [None] * 2000
    for i in range(2000):
        monktrain, monkval = partition(data, fraction)
        t = dtree.buildTree(monktrain, monkdata.attributes)
        res[i] = 1 - dtree.check(prune(t, monkval), monktest)
    return res
Example #33
0
def evaluate_pruning():
  fractions = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
  monk1_pruned = []
  monk3_pruned = []


  for i in range(100):
    monk1_pruned.append(prune_trees(m.monk1, m.monk1test))
    monk3_pruned.append(prune_trees(m.monk3, m.monk3test))

  monk1_pruned = np.transpose(monk1_pruned)
  monk3_pruned = np.transpose(monk3_pruned)

  mean1 = np.mean(monk1_pruned, axis=1)
  mean3 = np.mean(monk3_pruned, axis=1)
  std1 = np.std(monk1_pruned, axis=1)
  std3 = np.std(monk3_pruned, axis=1)

  stat_table = PrettyTable(['Dataset/Stat', 'a1', 'a2', 'a3', 'a4', 'a5', 'a6'])
  stat_table.add_row(np.concatenate((['MONK-1 - MEAN'], np.around(mean1, decimals=6)), axis=0))
  stat_table.add_row(np.concatenate((['MONK-3 - MEAN'], np.around(mean3, decimals=6)), axis=0))
  stat_table.add_row(np.concatenate((['MONK-1 - STDEV'], np.around(std1, decimals=6)), axis=0))
  stat_table.add_row(np.concatenate((['MONK-1 - STDEV'], np.around(std3, decimals=6)), axis=0))
  print(stat_table)

  complete_tree1 = dt.buildTree(m.monk1, m.attributes)
  complete_tree3 = dt.buildTree(m.monk3, m.attributes)

  prn_table = PrettyTable(['Dataset', 'Error on Complete Tree', 'Error on Pruned Tree (mean)'])
  prn_table.add_row(['MONK-1', 1 - dt.check(complete_tree1, m.monk1test), np.amin(mean1)])
  prn_table.add_row(['MONK-3', 1 - dt.check(complete_tree3, m.monk3test), np.amin(mean3)])
  print(prn_table)

  plt.plot(fractions, mean1, color='#49abc2', marker='o', label="Means")
  plt.title("Mean Error vs Fractions on MONK-1")
  plt.xlabel("Fractions")
  plt.ylabel("Means of Error")
  plt.legend(loc='upper right', frameon=False)
  plt.show()

  plt.plot(fractions, mean3, color='#fe5f55', marker='o', label="Means")
  plt.title("Mean Error vs Fractions on MONK-3")
  plt.xlabel("Fractions")
  plt.ylabel("Means of Error")
  plt.legend(loc='upper right', frameon=False)
  plt.show()
Example #34
0
def ass3():
    test = [mdata.monk1test, mdata.monk2test, mdata.monk3test]
    count = 0
    for dset in [mdata.monk1, mdata.monk2, mdata.monk3]:
        t = dtree.buildTree(dset, mdata.attributes)
        print("Training error for set " + str(count + 1) + ": " + str(1 - dtree.check(t, dset)))
        print("Test error for set " + str(count + 1) + ": " + str(1 - dtree.check(t, test[count])))
        count = count + 1
Example #35
0
def assignment4():
	print "--- Assignment 4 ---"
	print "Selecting the best fraction to divide training and validation sets for pruning"
	
	table = Texttable(max_width=100)
	table.add_row(["Dataset", "0.3", "0.4", "0.5", "0.6", "0.7", "0.8", "Benchmark"])
	for i in range(3):
		row = ["Monk-" + str(i+1)]
		for frac in [(x * 0.1) for x in range(3,9)]:
			train_set, valid_set = m.partition(monkdata[i], frac)
			base = d.buildTree(train_set,m.attributes)
			best = best_pruned(base,valid_set)
			true_perf = d.check(best[0],testdata[i])
			row += [true_perf]
		row += [d.check(d.buildTree(monkdata[i],m.attributes),testdata[i])]
		table.add_row(row)
	print table.draw()
	print					
Example #36
0
def getMean(data, testData, frac, iter):
    val = 0
    i = 0

    while i < iter:
        monktrain, monkval = partition(data, frac)
        t = d.buildTree(monktrain, m.attributes)
        val = val + pruneNow(t, monkval, testData)
        i = i + 1
    return val / iter
def getClasification(dataset,fraction):
    monk1train, monk1val = partition(dataset,fraction)
    testTree = tree.buildTree(monk1val,m.attributes)
    prunedTrees = tree.allPruned(testTree)
    pValue = 0
    for pruned in prunedTrees:
        if(tree.check(pruned,monk1train) > pValue):
            bestTree = pruned
            pValue = tree.check(pruned,monk1train)
    return pValue, bestTree
Example #38
0
def assignment3():
	print "--- Assignment 3 ---"
	print "Performance of the decision trees"
	table = Texttable(max_width=100)
	table.add_row(["Dataset", "Training", "Test"])
	for i in range(3):
		tree = d.buildTree(monkdata[i],m.attributes)
		perf = [d.check(tree, monkdata[i]), d.check(tree, testdata[i])]
		table.add_row(["Monk-" + str(i+1)] + perf)
	print table.draw()
	print
Example #39
0
def find_prunned(data_part, f_part):
    monk1train, monkvalue = partition(data_part, f_part)
    dtree = tree.buildTree(monk1train, dataset.attributes)
    prun_list = tree.allPruned(dtree)
    current_correctness = tree.check(dtree, monkvalue)
    for current_tree in prun_list:
        check_correctness = tree.check(current_tree, monkvalue)
        if check_correctness > current_correctness:
            current_correctness = check_correctness
            dtree = current_tree
    return dtree
Example #40
0
def main(argv): 
    
    print "Entropy Monk1: " + str(tree.entropy(m.monk1))
    print "Entropy Monk2: " + str(tree.entropy(m.monk2))
    print "Entropy Monk3: " + str(tree.entropy(m.monk3))
    
    print "Average Gain Monk1(a1): " + str(tree.averageGain(m.monk1, m.attributes[0])) 
    print "Average Gain Monk1(a2): " + str(tree.averageGain(m.monk1, m.attributes[1]))
    print "Average Gain Monk1(a3): " + str(tree.averageGain(m.monk1, m.attributes[2]))
    print "Average Gain Monk1(a4): " + str(tree.averageGain(m.monk1, m.attributes[3]))
    print "Average Gain Monk1(a5): " + str(tree.averageGain(m.monk1, m.attributes[4]))
    print "Average Gain Monk1(a6): " + str(tree.averageGain(m.monk1, m.attributes[5]))
    
    print "Average Gain Monk2(a1): " + str(tree.averageGain(m.monk2, m.attributes[0])) 
    print "Average Gain Monk2(a2): " + str(tree.averageGain(m.monk2, m.attributes[1]))
    print "Average Gain Monk2(a3): " + str(tree.averageGain(m.monk2, m.attributes[2]))
    print "Average Gain Monk2(a4): " + str(tree.averageGain(m.monk2, m.attributes[3]))
    print "Average Gain Monk2(a5): " + str(tree.averageGain(m.monk2, m.attributes[4]))
    print "Average Gain Monk2(a6): " + str(tree.averageGain(m.monk2, m.attributes[5]))
    
    print "Average Gain Monk3(a1): " + str(tree.averageGain(m.monk3, m.attributes[0])) 
    print "Average Gain Monk3(a2): " + str(tree.averageGain(m.monk3, m.attributes[1]))
    print "Average Gain Monk3(a3): " + str(tree.averageGain(m.monk3, m.attributes[2]))
    print "Average Gain Monk3(a4): " + str(tree.averageGain(m.monk3, m.attributes[3]))
    print "Average Gain Monk3(a5): " + str(tree.averageGain(m.monk3, m.attributes[4]))
    print "Average Gain Monk3(a6): " + str(tree.averageGain(m.monk3, m.attributes[5]))
    
    #print "Average Gain Level 2 Monk1(a1): " + str(tree.averageGain(tree.select(m.monk1, m.attributes[0], value), m.attributes[0])) 
    #draw.drawTree(tree.buildTree(m.monk1, m.attributes, 2))

    t=tree.buildTree(m.monk1,m.attributes);
    print(tree.check(t, m.monk1test))
    print(tree.check(t, m.monk1))
    
    t2=tree.buildTree(m.monk2,m.attributes);
    print(tree.check(t2, m.monk2test))
    print(tree.check(t2, m.monk2))
    
    t3=tree.buildTree(m.monk3,m.attributes);
    print(tree.check(t3, m.monk3test))
    print(tree.check(t3, m.monk3))
Example #41
0
def generateErrorTable(dataset, testset, fractions, tries):
    result=[]	
    for x in fractions:
        acc = 0
        for i in range(tries):
            trainSet, valSet =partition(dataset, x)

            tree = dtree.buildTree(trainSet, m.attributes)
            prunedTree = findBestPrune(tree, valSet)
            acc += dtree.check(prunedTree, testset)
        result.append( (x,acc / tries) )
    return result
Example #42
0
def test_pruning(dataset, testset):
    fraction_list = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
    print ("TESTING PRUNING")
    for fraction in fraction_list:
        print("--------------")
        print(fraction)
        monk_tree = d.buildTree(dataset,m.attributes)
        training, validation = partition(dataset, fraction)
        pruned_monk_tree = prune_tree(monk_tree,validation)
        print(d.check(monk_tree, testset))
        print(d.check(pruned_monk_tree, testset))
        print("--------------")
Example #43
0
def prune():
  print "\n------------------------------\nAssignment 4 - Pruning\n------------------------------"
  print "Dataset\t  0.3\t\t  0.4\t\t  0.5\t\t  0.6\t\t  0.7\t\t  0.8"
  partSizes = [0.3, 0.4, 0.5, 0.6, 0.7, 0.8]
  r = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
  i = 0
  for size in partSizes:  
    for j in range(100):
      training, test = partition(data.monk1, size)
      bestTree = dt.buildTree(training, data.attributes)
      bestClass = dt.check(bestTree, test)
      better = True
      while better:
        better = False
        for subTree in dt.allPruned(bestTree):
          if dt.check(subTree, test) > bestClass:
            bestTree = subTree
            bestClass = dt.check(subTree, test)
            better = True
      r[i] += (1-dt.check(bestTree, data.monk1test))
    i += 1
  print "Monk1\t%0.6f\t%0.6f\t%0.6f\t%0.6f\t%0.6f\t%0.6f\t" % (r[0]/100, r[1]/100, r[2]/100, r[3]/100, r[4]/100, r[5]/100)
  r = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
  i = 0
  for size in partSizes:  
    for j in range(100):
      training, test = partition(data.monk3, size)
      bestTree = dt.buildTree(training, data.attributes)
      bestClass = dt.check(bestTree, test)
      better = True
      while better:
        better = False
        for subTree in dt.allPruned(bestTree):
          if dt.check(subTree, test) >= bestClass:
            bestTree = subTree
            bestClass = dt.check(subTree, test)
            better = True
      r[i] += (1-dt.check(bestTree, data.monk3test))
    i += 1
  print "Monk3\t%0.6f\t%0.6f\t%0.6f\t%0.6f\t%0.6f\t%0.6f\t" % (r[0]/100, r[1]/100, r[2]/100, r[3]/100, r[4]/100, r[5]/100)
Example #44
0
def make_pruned(dataset, testset, ratio = 0.5):
        '''
        Takes data- and testset and partitions data. 
        Then makes pruned tree and checks performance
        '''
        test, val = partition(dataset, ratio)
        tree = dt.buildTree(test, m.attributes)
        #per_ref = check_tree_performance(tree, testset)
        per_ref = check_tree_performance(tree, val)
        best = prune(tree, val, per_ref)
        #per_pruned = check_tree_performance(best, testset)
        per_pruned = check_tree_performance(best, val)
        return best, per_ref, per_pruned
Example #45
0
def best_pruned_tree(dataset, fraction):
    train, val = partition(dataset, fraction)
    tree = dt.buildTree(train, m.attributes)
    improved = True
    while improved:
        improved = False
        best_performance = dt.check(tree, val)
        for pruned_tree in dt.allPruned(tree):
            performance = dt.check(pruned_tree, val)
            if performance > best_performance:
                best_performance = performance
                tree = pruned_tree
                improved = True
    return tree
def main():
    # tree = d.buildTree(m.monk1,m.attributes)
    # draw.drawTree(tree)
    # Assignment 1
    print("==Ass 01==")
    calcentropy()

    # Assignment 2
    print("==Ass 02==")
    calcgain()

    # Assignment 3.1
    print("==Ass 03.1==")
    mytree = buildMonk1DecisionTreeTo2ndLevel()
    prebuildttree = d.buildTree(m.monk1, m.attributes, 2)
    print(mytree)
    print(prebuildttree)
    # draw.drawTree(mytree)

    # Assignment 3.2
    print("==Ass 03.2==")
    buildAndCheckDecisionTreeForDatasets(m.monk1, m.monk1test, "Monk 1")
    buildAndCheckDecisionTreeForDatasets(m.monk2, m.monk2test, "Monk 2")
    buildAndCheckDecisionTreeForDatasets(m.monk3, m.monk3test, "Monk 3")

    print("==Ass 04==")
    trainingdatapercentage = [.3, .4, .5, .6, .7, .8]
    treeerrormonk1 = []
    treeerrormonk3 = []
    tries = 1000
    for fraction in trainingdatapercentage:
        for dataset in [m.monk1, m.monk3]:
            value = 0.0
            for iteration in range(0, tries):
                value += pruneDecisionTree(dataset, m.attributes, fraction)
            value /= tries
            value = round(value, 4)
            if dataset == m.monk1:
                treeerrormonk1.append(value)
            else:
                treeerrormonk3.append(value)

    print("Errors for fractions")
    print(trainingdatapercentage)
    print(treeerrormonk1)
    print(treeerrormonk3)
Example #47
0
def pruning( trainingSet, testSet, fraction ):
  train1, train2 = partition( trainingSet, fraction )

  bestTree = dT.buildTree( train1, m.attributes )
  bestTreePerf = dT.check( bestTree, train2 )
  bestTreeFound = True

  while bestTreeFound == True:
    bestTreeFound = False

    prunedTrees = dT.allPruned( bestTree )

    for candidateTree in prunedTrees:

      if dT.check( candidateTree, train2 ) >= bestTreePerf:
        bestTree = candidateTree
        bestTreePerf = dT.check( candidateTree, train2 )
        bestTreeFound = True

  return dT.check( bestTree, testSet )
Example #48
0
def assignment4helper(dataset, fraction):
    monk1train, monk1val = partition(dataset, fraction)
    tree = d.buildTree(monk1train, m.attributes)

    bestTree = None
    maxVal = -1
    cont = True
    i = 0
    while (cont):
        cont = False
        i += 1
        for t in d.allPruned(tree):
            val = d.check(t, monk1val)
            if (val > maxVal):
                cont = True
                bestTree = t
                maxVal = val
        tree = bestTree
    # print("#iterations: %d" % i)
    return tree
Example #49
0
def calc_next_level():
  #print "\nAverage gain when a5 is choosen"
  print "\nA5\t  a1\t\t  a2\t\t  a3\t\t  a4\t\t  a5\t\t  a6"
  s = "A5(" 
  for val in data.attributes[4].values:
    subset = dt.select(data.monk1, data.attributes[4], val)
    t = "\t"
    for attr in data.attributes: 
      t = t + "%.6f\t" % (dt.averageGain(subset, attr))
    print val , t
    best = dt.bestAttribute(subset, data.attributes)
    s = s + best.name + "("
    #print "best attribute: ", best.name
    for value in best.values:
      #print "choose: ", value, "mostCommon: ", dt.mostCommon(dt.select(subset, best, value))
      if(dt.mostCommon(dt.select(subset, best, value))): 
        s = s + "+"
      else:
        s = s + "-"
    s = s + ")"
  s = s + ")"
  print "\nOur tree:\t", s
  print "Build tree:\t", dt.buildTree(data.monk1, data.attributes, 2)
Example #50
0
def assignment3():
    print("Monk1")
    monk1Tree = d.buildTree(m.monk1, m.attributes)
    print(1 - d.check(monk1Tree, m.monk1))
    print(1 - d.check(monk1Tree, m.monk1test))
    print(monk1Tree)

    print("Monk2")
    monk2Tree = d.buildTree(m.monk2, m.attributes)
    print(1 - d.check(monk2Tree, m.monk2))
    print(1 - d.check(monk2Tree, m.monk2test))
    print(monk2Tree)

    print("Monk3")
    monk3Tree = d.buildTree(m.monk3, m.attributes)
    print(1 - d.check(monk3Tree, m.monk3))
    print(1 - d.check(monk3Tree, m.monk3test))
    print(monk3Tree)

    print("Monk1 --  2 Levels")
    monk1Tree = d.buildTree(m.monk1, m.attributes, 2)
    print(1 - d.check(monk1Tree, m.monk1))
    print(1 - d.check(monk1Tree, m.monk1test))
    print(monk1Tree)

    print("Monk2 --  2 Levels")
    monk2Tree = d.buildTree(m.monk2, m.attributes, 2)
    print(1 - d.check(monk2Tree, m.monk2))
    print(1 - d.check(monk2Tree, m.monk2test))
    print(monk2Tree)

    print("Monk3 --  2 Levels")
    monk3Tree = d.buildTree(m.monk3, m.attributes, 2)
    print(1 - d.check(monk3Tree, m.monk3))
    print(1 - d.check(monk3Tree, m.monk3test))
    print(monk3Tree)
Example #51
0
    breakPoint= int(len(ldata) * fraction)
    return ldata[:breakPoint], ldata[breakPoint:]


def unzip(values):
    return [list(t) for t in zip(*values)]

fractions = [0.3,0.4,0.5,0.6,0.7,0.8]
series=[]
for pair in setpairs:
    values = []
    for fraction in fractions:
        s = pair[0]
        testdata = pair[1]
        training, validation = partition(s, fraction)
        tree=dtree.buildTree(training, monkdata.attributes)
        keepPruning = True
        while keepPruning:
            alternatives = dtree.allPruned(tree)
            keepPruning = False
            for alternative in alternatives:
                if(dtree.check(alternative,validation) > dtree.check(tree,validation)):
                    tree = alternative
                    keepPruning = True
        error=dtree.check(tree,testdata)
        values.append((fraction,error))
    #convert pairs to two lists [xs, ys]
    data=unzip(values)
    data.append(pair[2])
    series.append(data)
Example #52
0
#splitting the data
a = bestAttribute(m.monk1, m.attributes)
data = []
for v in a.values:
    data.append(dt.select(m.monk1, a, v))

#calculating the average information gain for the next level
for d in data:
    for a in m.attributes:
        print dt.averageGain(d, a)
    print '\n'
print '\n' 

#comparison with the tree from the predefined function
tree = dt.buildTree(m.monk1, m.attributes, 2)
#draw.drawTree(tree)


#building the trees for all the monks datasets
#assignment 3
tree1 = dt.buildTree(m.monk1, m.attributes)
print dt.check(tree1, m.monk1)
print dt.check(tree1, m.monk1test)
#draw.drawTree(tree)
print '\n'

tree2 = dt.buildTree(m.monk2, m.attributes)
print dt.check(tree2, m.monk2)
print dt.check(tree2, m.monk2test)
#draw.drawTree(tree)
Example #53
0
def print_non_pruned_performance(training_set, test_set):
    non_pruned_tree = dt.buildTree(training_set, m.attributes)
    performance_without_pruning = dt.check(non_pruned_tree, test_set)
    print('Performance without pruning: {}'.format(performance_without_pruning))
Example #54
0
  gain_partition3.append(dt.averageGain(partition3,m.attributes[x]))
  gain_partition4.append(dt.averageGain(partition4,m.attributes[x]))

print "Dataset\tA1\t\tA2\t\tA3\t\tA4\t\tA5\t\tA6"
print "Part 1: ","\t".join(["%.7f"%y for y in gain_partition1])
print "Part 2: ","\t".join(["%.7f"%y for y in gain_partition2])
print "Part 3: ","\t".join(["%.7f"%y for y in gain_partition3])
print "Part 4: ","\t".join(["%.7f"%y for y in gain_partition4])

print
print "Own tree"
print "A5(",dt.mostCommon(partition1),"A4(",dt.mostCommon(partition2),")","A6",dt.mostCommon(partition3),")","A1(",dt.mostCommon(partition4), "))" 

print
print "BuildTree function"
print dt.buildTree(m.monk1,m.attributes,2)
#draw.drawTree(dt.buildTree(m.monk1,m.attributes,2))


print
print "Building Trees"
t1 = dt.buildTree(m.monk1,m.attributes)
t2 = dt.buildTree(m.monk2,m.attributes)
t3 = dt.buildTree(m.monk3,m.attributes)
print "Checking Full Tree"
print "Dataset\tE train\t\tE test"
print "Monk1\t","%.7f"%dt.check(t1,m.monk1), "\t%.7f"%dt.check(t1,m.monk1test)
print "Monk1\t","%.7f"%dt.check(t2,m.monk2), "\t%.7f"%dt.check(t2,m.monk2test)
print "Monk1\t","%.7f"%dt.check(t3,m.monk3), "\t%.7f"%dt.check(t3,m.monk3test)

Example #55
0
    d.averageGain(m.monk2, m.attributes[0]), d.averageGain(m.monk2, m.attributes[1]),
    d.averageGain(m.monk2, m.attributes[2]), d.averageGain(m.monk2, m.attributes[3]),
    d.averageGain(m.monk2, m.attributes[4]), d.averageGain(m.monk2, m.attributes[5])
))

print("monk-3: %f %f %f %f %f %f" % (
    d.averageGain(m.monk3, m.attributes[0]), d.averageGain(m.monk3, m.attributes[1]),
    d.averageGain(m.monk3, m.attributes[2]), d.averageGain(m.monk3, m.attributes[3]),
    d.averageGain(m.monk3, m.attributes[4]), d.averageGain(m.monk3, m.attributes[5])
))

monk1_subset = d.select(m.monk1, m.attributes[4], 3)

print len(monk1_subset)
print(d.mostCommon(monk1_subset))
monk1_subset_tree = d.buildTree(monk1_subset, m.attributes, 5)
print(monk1_subset_tree)

t1 = d.buildTree(m.monk1, m.attributes);
print(d.check(t1, m.monk1test))
print(d.check(t1, m.monk1))

t2 = d.buildTree(m.monk2, m.attributes);
print(d.check(t2, m.monk2test))
print(d.check(t2, m.monk2))

t3 = d.buildTree(m.monk3, m.attributes);
print(d.check(t3, m.monk3test))
print(d.check(t3, m.monk3))

Example #56
0
def assignment3_p2():
    print("\n#####Start Assignment 3 part 2")
    splits = myBuildTree(m.monk1, 2)
    print("splits", splits)
    print(d.buildTree(m.monk1, m.attributes, 2))
print "Gain Monk1 a5(3) - a5: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 3),m.attributes[4]))
print "Gain Monk1 a5(3) - a6: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 3),m.attributes[5]))

print "Gain Monk1 a5(4) - a1: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 4),m.attributes[0]))
print "Gain Monk1 a5(4) - a2: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 4),m.attributes[1]))
print "Gain Monk1 a5(4) - a3: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 4),m.attributes[2]))
print "Gain Monk1 a5(4) - a4: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 4),m.attributes[3]))
print "Gain Monk1 a5(4) - a5: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 4),m.attributes[4]))
print "Gain Monk1 a5(4) - a6: " + str(tree.averageGain(tree.select(m.monk1, m.attributes[4], 4),m.attributes[5]))

selec1 = tree.select(m.monk1, m.attributes[4], 4)
print "Most Common Level2 Monk1(1): " + str(tree.mostCommon(tree.select(selec1,m.attributes[1],1)))
print "Most Common Level2 Monk1(2): " + str(tree.mostCommon(tree.select(selec1,m.attributes[1],2)))
print "Most Common Level2 Monk1(3): " + str(tree.mostCommon(tree.select(selec1,m.attributes[1],3)))

print "Monk 1 Etrain : " + str(tree.check(tree.buildTree(m.monk1, m.attributes), m.monk1))
print "Monk 1 Etest  : " + str(tree.check(tree.buildTree(m.monk1, m.attributes), m.monk1test))
print "Monk 2 Etrain : " + str(tree.check(tree.buildTree(m.monk2, m.attributes), m.monk2))
print "Monk 2 Etest  : " + str(tree.check(tree.buildTree(m.monk2, m.attributes), m.monk2test))
print "Monk 3 Etrain : " + str(tree.check(tree.buildTree(m.monk3, m.attributes), m.monk3))
print "Monk 3 Etest  : " + str(tree.check(tree.buildTree(m.monk3, m.attributes), m.monk3test))

print "ID3 built tree : \n"
tree1 = tree.buildTree(m.monk1,m.attributes,2)
#d.drawTree(tree1)

#x = [0.3,0.4,0.5,0.6,0.7,0.8]
#y = []
#for fraction in x:
#    monk1train, monk1val = partition(m.monk1,fraction)
#    testTree = tree.buildTree(monk1val,m.attributes)
Example #58
0
# print(sel)
sub = []
mC = []
for subset in sel:
    for i in [0, 1, 2, 3, 5]:
        sub.append(t.averageGain(subset, m.attributes[i]))
    mC.append(t.mostCommon(subset))

    # print(sub)
    sub = []

"Highest information gain on second level of the tree # 2 - A4 , 3 - A6 , 4 - A1 #"

"""Assignment 3"""
tree1 = t.buildTree(m.monk1, m.attributes)
tree2 = t.buildTree(m.monk2, m.attributes)
tree3 = t.buildTree(m.monk3, m.attributes)

draw.drawTree(tree1)
# draw.drawTree(tree2)
# draw.drawTree(tree3)

print("Assignment 3: Decision tree performances")

print("Train errors:")
print(1 - round(t.check(tree1, m.monk1), 5))
print(1 - round(t.check(tree2, m.monk2), 5))
print(1 - round(t.check(tree3, m.monk3), 5))

print("Test errors:")
Example #59
0
import monkdata as m
import dtree as d

t = d.buildTree(m.monk1, m.attributes)
print('monk1')
print(d.check(t, m.monk1test))
print(d.check(t, m.monk1))
print()

print('monk2')
t = d.buildTree(m.monk2, m.attributes)
print(d.check(t, m.monk2test))
print(d.check(t, m.monk2))
print()

print('monk3')
t = d.buildTree(m.monk3, m.attributes)
print(d.check(t, m.monk3test))
print(d.check(t, m.monk3))

Example #60
0
            currentgain = d.check(prunedTrees[x], validation)
            #print("Rate for tree %d: %f " % (x + 1, currentgain))
            if(currentgain > maxgain):
                maxgain = currentgain;
                bestTree = prunedTrees[x]

        prunedTrees = d.allPruned(bestTree)

        if(maxgain > bestGain):
            bestGain = maxgain
        else:
            run = False
            #print("Max accuracy reached. Pruning stopped.")
            #print("Best accuracy: %f" % bestGain);

    return bestTree

i = 1;
for set in monkset:
    print("Pruning for MONK-%d" % (monkset.index(set) + 1));
    for frac in fractions:
        print("Fraction: %f" % frac)
        newmonk, monkval = partition(set, frac)
        monktree = d.buildTree(newmonk, m.attributes)
        t = pruneTree(monktree, monkval)
        print("Accuracy for pruned tree against test data: %f (vs nonpruned: %f)" \
            % (d.check(t, monktestset[monkset.index(set)]),
               d.check(monktree, monktestset[monkset.index(set)])))
    i += 1;
    print()