# anchor_pos = kmeans(X1, 5) anchor_neg = kmeans(X2, 5) anchors = np.vstack((anchor_pos[0], anchor_neg[0])) test_ins = [Instance() for i in range(len(test_data))] for b in range(len(test_ins)): test_ins[b].feature_vector = test_data[b] instances = createInstances(data, labels) ########################################################### compute_gammas(instances, K=anchors, gamma=1.0) compute_gammas(test_ins, K=anchors, gamma=1.0) ######################################################### c = cafeMap(T=50000, beta=0.1, Lambda=0.1) c.train(instances, K=anchors, gamma=1.0) plt.figure() ########################Testing ########################## predictions = [] l_w1 = [] l_w2 = [] for t in test_ins: predictions += [c.predict_instance(t)] local_weight = c.W.dot(t.gammas) local_bias = c.bias.dot(t.gammas) l_w1 += [local_weight[0]] l_w2 += [local_weight[1]] l_w1 = np.array(np.absolute(l_w1))
Ytemp = np.append(Y, Yv) M = np.mean(Xtemp, axis=0) S = np.std(Xtemp, axis=0) + 1e-7 N, d = X.shape X = (X.T / np.linalg.norm(X, axis=1)).T Xv = (Xv.T / np.linalg.norm(Xv, axis=1)).T I = createInstances(X, Y) Iv = createInstances(Xv, Yv) llc = compute_gammas(I + Iv, K=50, gamma=1e-3) classifier = cafeMap(Lambda=1e-2, T=20e3, no_bias=False, encoder=None, c_arg=True) classifier.train(I, history=500) scores = np.array(classifier.test(Iv)) aidx = np.argsort(scores) pidx = Yv == 1 nidx = Yv != 1 scores = scores[aidx] Yv = Yv[aidx] amax = 0 for s in scores: a = np.mean((2 * (scores[pidx] > s) - 1) == Yv[pidx]) a += np.mean((2 * (scores[nidx] > s) - 1) == Yv[nidx]) a /= 2 if a > amax:
if __name__ == '__main__': fname = 'data\\prostate_preprocessed.txt' features, labels, genes = readData( fname, 'tumor') # 'tumor' will be considered as +1 label instances = createInstances(features, labels) #instances= data points of type Instance as needed by cafemap compute_gammas(instances, K=10, k=10, gamma=0.1) # locally linear coding. #K= number of Anchor points in llc #k= number of non zero coefficients #gamma= hyper parameter >0 for llc to enforce sparsity and locality c = cafeMap(T=1000, Lambda=0.00001, beta=0.1) # T= number of iterations #Lambda= regularization parameter. default 1e-3 #beta= beta parameter in coordinate descent algorithm. default value 0.25 result, folds = kFoldCV( c, instances) # perform k fold cross validation. by default 10 fold CV # c= trained cafemap classifier #result, folds= kFoldCV(c, instances,folds=5) for 5 fold CV #result, folds= kFoldCV(c, instances, parallel=4) # parallel= number of Cpu cores to be used # parallel implementation requires "joblib" scores, labels, classifiers = zip(*result) perFoldAuc, perFoldAcc, perFoldBestAcc, perFoldThresh = perFoldAUC( scores, labels)
for f in ln[1:]: vector += [np.float(f)] feat_vecs += [vector] genes += [ln[0]] return np.array(feat_vecs).T, np.array(labels), genes if __name__ == '__main__': fname = 'data/prostate_preprocessed.txt' X, Y, genes = readData(fname, 'tumor') X = (X.T / np.linalg.norm(X, axis=1)).T instances = createInstances(X, Y) llc = compute_gammas(instances, K=10, k=10) c = cafeMap(Lambda=1e-3, T=1e2, no_bias=True) result, folds = c.kFoldCV(instances, parallel=1) scores, labels, classifiers = zip(*result) classifier = classifiers[0] Wb = classifiers[0].localWb(instances) #[:-1] for c in classifiers[1:]: Wb += c.localWb(instances) #[:-1] from sklearn.cluster import KMeans Wb0 = Wb * 1 Wb = 100 * Wb idx = np.sum(np.abs(Wb) > 1e-6, axis=1) > 0 idx = np.argsort(np.sum(np.abs(Wb), axis=1))[-40:] Wbr = Wb[idx, :] Wbr = Wbr[:, Y == -1] model = KMeans(init='k-means++', n_clusters=5)
test_data = np.array(test_data) test_ins = [Instance() for i in range(len(test_data))] for b in range(len(test_ins)): test_ins[b].feature_vector = test_data[b] all_data = np.vstack((data, test_data)) anchors = kmeans(all_data, 4) instances = createInstances(data, labels) ########################################################### compute_gammas(instances, K=anchors[0], k=2, gamma=1.0) compute_gammas(test_ins, K=anchors[0], k=2, gamma=1.0) ######################################################### c = cafeMap(T=10000, beta=10.0, Lambda=0.1) c.train(instances) plt.figure() ########################Testing ########################## predictions = [] l_w1 = [] l_w2 = [] for t in test_ins: predictions += [c.predict_instance(t)] local_weight = c.W.dot(t.gammas) local_bias = c.bias.dot(t.gammas) l_w1 += [local_weight[0]] l_w2 += [local_weight[1]] l_w1 = np.array(np.absolute(l_w1))
from circle import getCircle Xp, Xn = getCircle(N) X = np.vstack((Xp, Xn)) d = X.shape[1] Nu = nu * (2 * np.random.rand(2 * N, d) - 1) print "NSR", np.mean(100 * np.linalg.norm(Nu, axis=1) / np.linalg.norm(X, axis=1)) X += Nu Y = np.array([1] * N + [-1] * N) instances = createInstances(X, Y) classifier = cafeMap(Lambda=1e-1, T=5e3, no_bias=False, encoder='llc', K=5, gamma=1e-3) result, folds = classifier.kFoldCV(instances, folds=5, shuffle=True, history=100, parallel=4) #10-fold CV,, parallel = 3, scores, labels, classifiers = zip(*result) Wb = np.array([c.localWb(instances) for c in classifiers]) W = np.mean(Wb, axis=0)[:-1] fpr, tpr, auc = roc_VA(zip(*(scores, labels))) #generate vertically averaged ROC curve plt.figure() plt.plot(fpr, tpr) plt.xlabel('FPR')
vector+=[np.float(f)] feat_vecs+=[vector] genes+=[ln[0]] return np.array(feat_vecs).T, np.array(labels), genes if __name__ == '__main__': #========================lymphoma==================================# fname='data/dlbcl_preprocessed.txt' features,labels, genes=readData(fname, '1') instances=createInstances(features, labels) compute_gammas(instances, K=10, k=10, gamma=0.1) c=cafeMap(T=100000, Lambda=0.0001, beta=0.1) result, folds= c.kFoldCV(instances, parallel=4) scores,labels,classifiers = zip(*result) perFoldAuc, perFoldAcc, perFoldBestAcc, perFoldThresh= perFoldAUC(scores, labels) print "The AVG AUC for 10 folds(Lymphoma)=", np.mean(perFoldAuc) print "The AVG Accuracy (zero threshold) for 10 folds(Lymphoma)=", np.mean(perFoldAcc) print "The AVG Accuracy for 10 folds(Lymphoma best threshold)=", np.mean(perFoldBestAcc) # (best threshold) #===============================breast cancer=====================# # fname='data/breast_preprocessed.txt' features,labels, genes=readData(fname, 'luminal') instances=createInstances(features, labels) compute_gammas(instances, K=10, k=10, gamma=10.0) c=cafeMap( T=100000, Lambda=0.01, beta=0.1) result, folds= c.kFoldCV(instances, parallel=4)
Xp = np.repeat(np.atleast_2d(x),N,axis = 0) Xn = np.repeat(np.atleast_2d(x[::-1]),N,axis = 0) from circle import getCircle X = np.vstack((Xp,Xn)) d = X.shape[1] Nu = nu*(2*np.random.rand(2*N,d)-1) print "NSR",np.mean(100*np.linalg.norm(Nu,axis=1)/np.linalg.norm(X,axis=1)) X+=Nu Y = np.array([1]*N+[-1]*N) instances=createInstances(X, Y) classifier = cafeMap(Lambda = 1e-1, T = 1e3, no_bias = False) result,folds = classifier.kFoldCV(instances, K = 5, gamma = 1e-3, folds = 5, shuffle = True, history = 100) #10-fold CV,, parallel = 3, scores,labels,classifiers = zip(*result) Wb = np.array([c.localWb(instances) for c in classifiers]) W = np.mean(Wb,axis = 0)[:-1] fpr,tpr,auc = roc_VA(zip(*(scores,labels))) plt.figure() plt.plot(fpr,tpr) plt.xlabel('FPR') plt.ylabel('TPR') plt.axis([0,1,0,1]) plt.grid() plt.title(str(auc))