Example #1
0
        result = pool.map(proc, enumerate(train_codes))
        ordered = zip(*sorted(result))
        H_train = ordered[1]
        H_test = ordered[2]
        #print H_train

        # Compare distance of code to codes and classify
        print "Training accuracy:", evaluate(H_train, train_labels, test_codes)
        print "Testing accuracy:", evaluate(H_test, test_labels, codes)
    elif args.m == 'VOTE':
        # Run SMO on each label pair {i=-1, j=+1}
        NUM_LABELS = 10
        m = len(train_data)
        models = {}

        G = gram(train_data)

        # Simple voting, no tie-resolution    
        votes_simple = np.zeros((m, NUM_LABELS))
        for i in range(NUM_LABELS):
            models[i] = {}
            for j in range(i+1, NUM_LABELS):
                print i,j
                model = Model(i, j, G)
                models[i][j] = model
                model.train(train_data, train_labels)
                for k in range(m):
                    v = model.test(k)
                    votes_simple[k][v] += 1
        ties = 0
        h_simple = np.zeros(m)
Example #2
0
data_file = "../data/spambase/spambase.data"
dmat = []
f = open(data_file, "r")
for line in f:
    x = line.split(',')
    x = [float(e) for e in x]
    dmat.append(x)
data = np.array(dmat)

# k-folds xvalidation
k = 10 
kfolder = KFolder(data, k, standard=True, shuffle=True)
for i in range(k-1):
    print "Fold:", i+1
    
    # Get data and labels at fold k
    X,Y = kfolder.testing(i+1)
    
    # Get the testing data
    Xi,Yi = kfolder.testing(i)
    Yi[Yi==0] = -1.0
    
    # Train
    Y[Y==0] = -1.0
    G, Gi = gram(X), tgram(X, Xi)
    a, b = train(X, Y.ravel(), G, C=1e-4, tol=1e-4, eps=1e-3)

    # Test
    print "Training accuracy:", test(Y, Y, G, a, b)
    print "Testing accuracy:", test(Y, Yi, Gi, a, b)