result = pool.map(proc, enumerate(train_codes)) ordered = zip(*sorted(result)) H_train = ordered[1] H_test = ordered[2] #print H_train # Compare distance of code to codes and classify print "Training accuracy:", evaluate(H_train, train_labels, test_codes) print "Testing accuracy:", evaluate(H_test, test_labels, codes) elif args.m == 'VOTE': # Run SMO on each label pair {i=-1, j=+1} NUM_LABELS = 10 m = len(train_data) models = {} G = gram(train_data) # Simple voting, no tie-resolution votes_simple = np.zeros((m, NUM_LABELS)) for i in range(NUM_LABELS): models[i] = {} for j in range(i+1, NUM_LABELS): print i,j model = Model(i, j, G) models[i][j] = model model.train(train_data, train_labels) for k in range(m): v = model.test(k) votes_simple[k][v] += 1 ties = 0 h_simple = np.zeros(m)
data_file = "../data/spambase/spambase.data" dmat = [] f = open(data_file, "r") for line in f: x = line.split(',') x = [float(e) for e in x] dmat.append(x) data = np.array(dmat) # k-folds xvalidation k = 10 kfolder = KFolder(data, k, standard=True, shuffle=True) for i in range(k-1): print "Fold:", i+1 # Get data and labels at fold k X,Y = kfolder.testing(i+1) # Get the testing data Xi,Yi = kfolder.testing(i) Yi[Yi==0] = -1.0 # Train Y[Y==0] = -1.0 G, Gi = gram(X), tgram(X, Xi) a, b = train(X, Y.ravel(), G, C=1e-4, tol=1e-4, eps=1e-3) # Test print "Training accuracy:", test(Y, Y, G, a, b) print "Testing accuracy:", test(Y, Yi, Gi, a, b)