コード例 #1
0
def cross_validate(fold, data):
    def make_chunks(seq, num_chunks):
        avg = len(seq) / float(num_chunks)
        out = []
        last = 0.0
        while last < len(seq):
            out.append(seq[int(last):int(last + avg)])
            last += avg
        return out
        
    random.shuffle(data)
    chunks = make_chunks(data, fold)

    err = []
    err_prune = []
    for i in range(fold):
        test_data = chunks[i]
        train_data = chunks[:i] + chunks[i+1:]
        
        root = gen_tree(training_data)
        
        err.append(test_accuracy(root, test_data)[1])
        err_prune.append(test_accuracy(root, test_data, 2)[1])
        
    return (err , err_prune)
コード例 #2
0
def validation_error(test_data, train_data_orig, validate_ratio):
    random.shuffle(train_data_orig)
    
    # Make validation set
    split = int(len(train_data_orig) * validate_ratio)
    validate_data = train_data_orig[:split]
    train_data = train_data_orig[split:]
    
    # Initial tree
    root = gen_tree(train_data)
    
    # Find best pruning
    least_err = float("inf")
    best_prune = -1
    for i in range(max_depth(root)):
        err = test_accuracy(root, validate_data, i)[1]
        if err < least_err:
            least_err = err
            best_prune = i
    
    # Return err of best pruning against test data
    return test_accuracy(root, test_data, best_prune)[1]
コード例 #3
0
    best_prune = -1
    for i in range(max_depth(root)):
        err = test_accuracy(root, validate_data, i)[1]
        if err < least_err:
            least_err = err
            best_prune = i
    
    # Return err of best pruning against test data
    return test_accuracy(root, test_data, best_prune)[1]
            

            
training_data = load_training("wifi.train")
test_data = load_training("wifi.test")

root = gen_tree(training_data)

print "\n"

err_locs, err_all = test_accuracy(root, training_data)
print "TEST ACCURACY ON TRAINING SET:"
print "\tLOCATION SPECIFIC ERRORS:"
for loc, val in err_locs.iteritems():
    print "\t\t{0} : {1}".format(loc,val)
print "\tOVERALL ERROR:\n\t\t{0}".format(err_all)

print "\n"

err_locs, err_all = test_accuracy(root, test_data)
print "TEST ACCURACY ON TEST SET:"
print "\tLOCATION SPECIFIC ERRORS:"