def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    args = {}
    args['metric'] = metrics.error
    args['maxdepth'] = 1

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Conduct training and testing
        classifier.train(trainingData, trainingLabels, args)
        tree = classifier.tree
        best_split = int(config['best_split'][i])
        test_passed = True
        if tree.column != best_split:
            error = 'WRONG SPLIT: Correct: %s , Returned: %s' % (best_split,
                                                                 tree.column)
            if not first_error: first_error = error
            print '\t\t%s' % error
            test_passed = False

        if not has_one_level(tree):
            error = 'The tree does not have one level'
            if not first_error: first_error = error
            print '\t\t%s' % error
            test_passed = False

        if test_passed:
            points += pointsPerCase
            print '\t\tOK'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
Esempio n. 2
0
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    args = {}
    args['metric'] = get_metric(config['metric'])

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Load test
        testData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/test_data.csv" % data))
        testLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/test_labels.csv" % data))

        # Conduct training and testing
        args['maxdepth'] = int(config['maxdepth'][i])
        classifier.train(trainingData, trainingLabels, args)
        guesses = classifier.classify(testData)
        min_accuracy_required = float(config['accuracy'][i])
        if is_good_classifier(guesses, testLabels, min_accuracy_required):
            print '\t\tOK'
            points += pointsPerCase
        else:
            points += (pointsPerCase / 2.)
            print '\t\tMost frequent classifier is better'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']

    args = {}
    args['metric'] = metrics.error
    args['maxdepth'] = 0

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Conduct training and testing
        classifier.train(trainingData, trainingLabels, args)
        tree = classifier.tree
        if not tree.leftchild and not tree.rightchild:
            label = int(config['label'][i])
            if tree.label != label:
                error = 'The tree is not classifying by the most frequent label: %s' % label
                if not first_error: first_error = error
                print '\t\t%s' % error
            else:
                points += int(config['pointsPerCase'])
                print '\t\tOK'
        else:
            error = 'The tree has one or more levels'
            if not first_error: first_error = error
            print '\t\t%s' % error
    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)
def run(config, codeRoot=''):
    classifier = DecisionTreeClassifier([0, 1])
    print " ========= %s ========= " % config['header']
    pointsPerCase = int(config['pointsPerCase'])

    args = {}
    args['metric'] = get_metric(config['metric'])
    args['maxdepth'] = int(config['maxdepth'])

    size = len(config['dataset'])
    points = 0
    first_error = ''
    for i in xrange(size):
        data = config['dataset'][i]
        print "\tDataset: %s" % data
        # Load data
        trainingData = samples.loadDataFile(
            os.path.join(codeRoot, "data/%s/training_data.csv" % data))
        trainingLabels = samples.loadLabelsFile(
            os.path.join(codeRoot, "data/%s/training_labels.csv" % data))

        # Conduct training and testing
        classifier.train(trainingData, trainingLabels, args)
        tree = classifier.tree
        status, error = compare(tree, get_tree(data, codeRoot))
        if status != 0:
            if not first_error: first_error = error
            print '\t\t%s' % error
        else:
            points += pointsPerCase
            print '\t\tOK'

    needed_all_tests_passed = int(config['totalQuestion']) > 0
    if needed_all_tests_passed and first_error:
        points = 0
        test_error = 'Your code does not pass all tests'
    else:
        test_error = ''

    return (points, test_error)