def run(config, codeRoot=''): classifier = DecisionTreeClassifier([0, 1]) print " ========= %s ========= " % config['header'] pointsPerCase = int(config['pointsPerCase']) args = {} args['metric'] = metrics.error args['maxdepth'] = 1 size = len(config['dataset']) points = 0 first_error = '' for i in xrange(size): data = config['dataset'][i] print "\tDataset: %s" % data # Load data trainingData = samples.loadDataFile( os.path.join(codeRoot, "data/%s/training_data.csv" % data)) trainingLabels = samples.loadLabelsFile( os.path.join(codeRoot, "data/%s/training_labels.csv" % data)) # Conduct training and testing classifier.train(trainingData, trainingLabels, args) tree = classifier.tree best_split = int(config['best_split'][i]) test_passed = True if tree.column != best_split: error = 'WRONG SPLIT: Correct: %s , Returned: %s' % (best_split, tree.column) if not first_error: first_error = error print '\t\t%s' % error test_passed = False if not has_one_level(tree): error = 'The tree does not have one level' if not first_error: first_error = error print '\t\t%s' % error test_passed = False if test_passed: points += pointsPerCase print '\t\tOK' needed_all_tests_passed = int(config['totalQuestion']) > 0 if needed_all_tests_passed and first_error: points = 0 test_error = 'Your code does not pass all tests' else: test_error = '' return (points, test_error)
def run( config , codeRoot = '' ) : classifier = DecisionTreeClassifier( [ 0 , 1 ] ) print " ========= %s ========= " % config[ 'header' ] pointsPerCase = int( config[ 'pointsPerCase' ] ) num_cases = len( config[ 'case' ] ) points = 0 first_error = '' for metric in config[ 'metric' ] : mymet = get_metric( metric ) print "\tMetric: %s" % metric results = [ float( val ) for val in config[ 'results_%s' % metric ] ] for i in xrange( num_cases ) : data = eval( config[ 'case' ][ i ] ) print "\t\tCase: %s" % data resp = mymet( data ) if compare( resp , results[ i ] ) == 0 : points += pointsPerCase print '\t\t\tOK' else : error = 'ERROR: Correct: %s, Returned: %s' % ( results[ i ] , resp ) print '\t\t\t%s' % error if not first_error : first_error = error # Check if need to pass all tests needed_all_tests_passed = int( config[ 'totalQuestion' ] ) > 0 if needed_all_tests_passed and first_error : points = 0 test_error = 'Your code does not pass all tests' else : test_error = '' return ( points , test_error )
def run(config, codeRoot=''): classifier = DecisionTreeClassifier([0, 1]) print " ========= %s ========= " % config['header'] pointsPerCase = int(config['pointsPerCase']) args = {} args['metric'] = get_metric(config['metric']) size = len(config['dataset']) points = 0 first_error = '' for i in xrange(size): data = config['dataset'][i] print "\tDataset: %s" % data # Load data trainingData = samples.loadDataFile( os.path.join(codeRoot, "data/%s/training_data.csv" % data)) trainingLabels = samples.loadLabelsFile( os.path.join(codeRoot, "data/%s/training_labels.csv" % data)) # Load test testData = samples.loadDataFile( os.path.join(codeRoot, "data/%s/test_data.csv" % data)) testLabels = samples.loadLabelsFile( os.path.join(codeRoot, "data/%s/test_labels.csv" % data)) # Conduct training and testing args['maxdepth'] = int(config['maxdepth'][i]) classifier.train(trainingData, trainingLabels, args) guesses = classifier.classify(testData) min_accuracy_required = float(config['accuracy'][i]) if is_good_classifier(guesses, testLabels, min_accuracy_required): print '\t\tOK' points += pointsPerCase else: points += (pointsPerCase / 2.) print '\t\tMost frequent classifier is better' needed_all_tests_passed = int(config['totalQuestion']) > 0 if needed_all_tests_passed and first_error: points = 0 test_error = 'Your code does not pass all tests' else: test_error = '' return (points, test_error)
def run(config, codeRoot=''): classifier = DecisionTreeClassifier([0, 1]) print " ========= %s ========= " % config['header'] args = {} args['metric'] = metrics.error args['maxdepth'] = 0 size = len(config['dataset']) points = 0 first_error = '' for i in xrange(size): data = config['dataset'][i] print "\tDataset: %s" % data # Load data trainingData = samples.loadDataFile( os.path.join(codeRoot, "data/%s/training_data.csv" % data)) trainingLabels = samples.loadLabelsFile( os.path.join(codeRoot, "data/%s/training_labels.csv" % data)) # Conduct training and testing classifier.train(trainingData, trainingLabels, args) tree = classifier.tree if not tree.leftchild and not tree.rightchild: label = int(config['label'][i]) if tree.label != label: error = 'The tree is not classifying by the most frequent label: %s' % label if not first_error: first_error = error print '\t\t%s' % error else: points += int(config['pointsPerCase']) print '\t\tOK' else: error = 'The tree has one or more levels' if not first_error: first_error = error print '\t\t%s' % error needed_all_tests_passed = int(config['totalQuestion']) > 0 if needed_all_tests_passed and first_error: points = 0 test_error = 'Your code does not pass all tests' else: test_error = '' return (points, test_error)
def run(config, codeRoot=''): classifier = DecisionTreeClassifier([0, 1]) print " ========= %s ========= " % config['header'] pointsPerCase = int(config['pointsPerCase']) args = {} args['metric'] = get_metric(config['metric']) args['maxdepth'] = int(config['maxdepth']) size = len(config['dataset']) points = 0 first_error = '' for i in xrange(size): data = config['dataset'][i] print "\tDataset: %s" % data # Load data trainingData = samples.loadDataFile( os.path.join(codeRoot, "data/%s/training_data.csv" % data)) trainingLabels = samples.loadLabelsFile( os.path.join(codeRoot, "data/%s/training_labels.csv" % data)) # Conduct training and testing classifier.train(trainingData, trainingLabels, args) tree = classifier.tree status, error = compare(tree, get_tree(data, codeRoot)) if status != 0: if not first_error: first_error = error print '\t\t%s' % error else: points += pointsPerCase print '\t\tOK' needed_all_tests_passed = int(config['totalQuestion']) > 0 if needed_all_tests_passed and first_error: points = 0 test_error = 'Your code does not pass all tests' else: test_error = '' return (points, test_error)
def run(config, codeRoot=''): classifier = DecisionTreeClassifier([0, 1]) print " ========= %s ========= " % config['header'] pointsPerCase = int(config['pointsPerCase']) size = len(config['data']) points = 0 first_error = '' ''' Check if it is empty ''' data = eval(config['data'][0]) labels = eval(config['label'][0]) maxdepth = int(config['maxdepth'][0]) classifier.maxdepth = maxdepth print '\tCase #1: DATA(%s), LABELS(%s)' % (data, labels) if not classifier.isLeaf(data, labels, 2): error = 'WRONG: Check if dataset is empty' if not first_error: first_error = error print '\t\t%s' % error else: points += pointsPerCase print '\t\tOK' ''' Check if all instances have same label ''' data = eval(config['data'][1]) labels = eval(config['label'][1]) maxdepth = int(config['maxdepth'][1]) classifier.maxdepth = maxdepth print '\tCase #2: DATA(%s), LABELS(%s)' % (data, labels) if not classifier.isLeaf(data, labels, 2): error = 'WRONG: Check if all instances have same label' if not first_error: first_error = error print '\t\t%s' % error else: points += pointsPerCase print '\t\tOK' ''' Check if all instances are the same ''' data = eval(config['data'][2]) labels = eval(config['label'][2]) maxdepth = int(config['maxdepth'][2]) classifier.maxdepth = maxdepth print '\tCase #3: DATA(%s), LABELS(%s)' % (data, labels) if not classifier.isLeaf(data, labels, 2): error = 'WRONG: Check if all instances are the same' if not first_error: first_error = error print '\t\t%s' % error else: points += pointsPerCase print '\t\tOK' ''' Check maximum depth ''' data = eval(config['data'][3]) labels = eval(config['label'][3]) maxdepth = int(config['maxdepth'][3]) classifier.maxdepth = maxdepth print '\tCase #4: DATA(%s), LABELS(%s)' % (data, labels) if not classifier.isLeaf(data, labels, 3): error = 'WRONG: Check maximum depth parameter. Maximum: %s, Current: %s' % ( maxdepth, 3) if not first_error: first_error = error print '\t\t%s' % error else: points += pointsPerCase print '\t\tOK' needed_all_tests_passed = int(config['totalQuestion']) > 0 if needed_all_tests_passed and first_error: points = 0 test_error = 'Your code does not pass all tests' else: test_error = '' return (points, test_error)
from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split from decisionTree import DecisionTreeClassifier from pprint import pprint import numpy as np def accuracy(y_true, y_pred): accuracy = np.sum(y_true == y_pred) / len(y_true) return accuracy iris = load_iris() X = iris.data y = iris.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1) clf = DecisionTreeClassifier(max_depth=7, feature_names=iris.feature_names) m = clf.fit(X_train, y_train) pprint(m) predictions = clf.predict(X_test) print('Test accuracy:', accuracy(y_test, predictions))