def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    args = {}
    args['metric'] = metrics.error
    args['maxdepth'] = 1

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Conduct training and testing
        classifier.train(trainingData, trainingLabels, args)
        tree = classifier.tree
        best_split = int(config['best_split'][i])
        test_passed = True
        if tree.column != best_split:
            error = 'WRONG SPLIT: Correct: %s , Returned: %s' % (best_split,
                                                                 tree.column)
            if not first_error: first_error = error
            print '\t\t%s' % error
            test_passed = False

        if not has_one_level(tree):
            error = 'The tree does not have one level'
            if not first_error: first_error = error
            print '\t\t%s' % error
            test_passed = False

        if test_passed:
            points += pointsPerCase
            print '\t\tOK'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
def run( config , codeRoot = '' ) :
	classifier = DecisionTreeClassifier( [ 0 , 1 ] )
	print " ========= %s ========= " % config[ 'header' ]
	pointsPerCase = int( config[ 'pointsPerCase' ] )

	num_cases = len( config[ 'case' ] )
	points = 0
	first_error = ''
	for metric in config[ 'metric' ] :
		mymet = get_metric( metric )
		print "\tMetric: %s" % metric
		results = [ float( val ) for val in config[ 'results_%s' % metric ] ]
		for i in xrange( num_cases ) :
			data = eval( config[ 'case' ][ i ] )
			print "\t\tCase: %s" % data
			resp = mymet( data )

			if compare( resp , results[ i ] ) == 0 :
				points += pointsPerCase
				print '\t\t\tOK'
			else :
				error = 'ERROR: Correct: %s, Returned: %s' % ( results[ i ] , resp )
				print '\t\t\t%s' % error
				if not first_error : first_error = error
	# Check if need to pass all tests
	needed_all_tests_passed = int( config[ 'totalQuestion' ] ) > 0
	if needed_all_tests_passed and first_error :
		points = 0
		test_error = 'Your code does not pass all tests'
	else :
		test_error = ''
	return ( points , test_error )
Esempio n. 3
0
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    args = {}
    args['metric'] = get_metric(config['metric'])

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Load test
        testData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/test_data.csv" % data))
        testLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/test_labels.csv" % data))

        # Conduct training and testing
        args['maxdepth'] = int(config['maxdepth'][i])
        classifier.train(trainingData, trainingLabels, args)
        guesses = classifier.classify(testData)
        min_accuracy_required = float(config['accuracy'][i])
        if is_good_classifier(guesses, testLabels, min_accuracy_required):
            print '\t\tOK'
            points += pointsPerCase
        else:
            points += (pointsPerCase / 2.)
            print '\t\tMost frequent classifier is better'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']

    args = {}
    args['metric'] = metrics.error
    args['maxdepth'] = 0

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Conduct training and testing
        classifier.train(trainingData, trainingLabels, args)
        tree = classifier.tree
        if not tree.leftchild and not tree.rightchild:
            label = int(config['label'][i])
            if tree.label != label:
                error = 'The tree is not classifying by the most frequent label: %s' % label
                if not first_error: first_error = error
                print '\t\t%s' % error
            else:
                points += int(config['pointsPerCase'])
                print '\t\tOK'
        else:
            error = 'The tree has one or more levels'
            if not first_error: first_error = error
            print '\t\t%s' % error
    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    args = {}
    args['metric'] = get_metric(config['metric'])
    args['maxdepth'] = int(config['maxdepth'])

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Conduct training and testing
        classifier.train(trainingData, trainingLabels, args)
        tree = classifier.tree
        status, error = compare(tree, get_tree(data, codeRoot))
        if status != 0:
            if not first_error: first_error = error
            print '\t\t%s' % error
        else:
            points += pointsPerCase
            print '\t\tOK'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
Esempio n. 6
0
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    size = len(config['data'])
    points = 0
    first_error = ''
    ''' Check if it is empty '''
    data = eval(config['data'][0])
    labels = eval(config['label'][0])
    maxdepth = int(config['maxdepth'][0])
    classifier.maxdepth = maxdepth
    print '\tCase #1: DATA(%s), LABELS(%s)' % (data, labels)
    if not classifier.isLeaf(data, labels, 2):
        error = 'WRONG: Check if dataset is empty'
        if not first_error: first_error = error
        print '\t\t%s' % error
    else:
        points += pointsPerCase
        print '\t\tOK'
    ''' Check if all instances have same label '''
    data = eval(config['data'][1])
    labels = eval(config['label'][1])
    maxdepth = int(config['maxdepth'][1])
    classifier.maxdepth = maxdepth
    print '\tCase #2: DATA(%s), LABELS(%s)' % (data, labels)
    if not classifier.isLeaf(data, labels, 2):
        error = 'WRONG: Check if all instances have same label'
        if not first_error: first_error = error
        print '\t\t%s' % error
    else:
        points += pointsPerCase
        print '\t\tOK'
    ''' Check if all instances are the same '''
    data = eval(config['data'][2])
    labels = eval(config['label'][2])
    maxdepth = int(config['maxdepth'][2])
    classifier.maxdepth = maxdepth
    print '\tCase #3: DATA(%s), LABELS(%s)' % (data, labels)
    if not classifier.isLeaf(data, labels, 2):
        error = 'WRONG: Check if all instances are the same'
        if not first_error: first_error = error
        print '\t\t%s' % error
    else:
        points += pointsPerCase
        print '\t\tOK'
    ''' Check maximum depth '''
    data = eval(config['data'][3])
    labels = eval(config['label'][3])
    maxdepth = int(config['maxdepth'][3])
    classifier.maxdepth = maxdepth
    print '\tCase #4: DATA(%s), LABELS(%s)' % (data, labels)
    if not classifier.isLeaf(data, labels, 3):
        error = 'WRONG: Check maximum depth parameter. Maximum: %s, Current: %s' % (
            maxdepth, 3)
        if not first_error: first_error = error
        print '\t\t%s' % error
    else:
        points += pointsPerCase
        print '\t\tOK'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
Esempio n. 7
0
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from decisionTree import DecisionTreeClassifier
from pprint import pprint
import numpy as np


def accuracy(y_true, y_pred):
    accuracy = np.sum(y_true == y_pred) / len(y_true)
    return accuracy


iris = load_iris()

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1)

clf = DecisionTreeClassifier(max_depth=7, feature_names=iris.feature_names)

m = clf.fit(X_train, y_train)

pprint(m)

predictions = clf.predict(X_test)

print('Test accuracy:', accuracy(y_test, predictions))