def test_kernelcenterer_vs_sklearn(): # Compare msmbuilder.preprocessing.KernelCenterer # with sklearn.preprocessing.KernelCenterer kernelcentererr = KernelCentererR() kernelcentererr.fit(np.concatenate(trajs)) kernelcenterer = KernelCenterer() kernelcenterer.fit(trajs) y_ref1 = kernelcentererr.transform(trajs[0]) y1 = kernelcenterer.transform(trajs)[0] np.testing.assert_array_almost_equal(y_ref1, y1)
def cv_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) para_f = "../svm_result/upperbound/"+dataset+"_fold_%d_%s.ubound" % (i,mkl) test = np.array(tags == i) train = np.array(~test) train_y = labels[train,:] test_y = labels[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] # all train kernels are nomalized and centered for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) train_km_list.append(train_km_c) if mkl == 'UNIMKL': res = UNIMKL(train_km_list, train_y) np.savetxt(res_f, res) if mkl == 'ALIGNF2': res = alignf2(train_km_list, train_y, data) np.savetxt(res_f, res) if mkl.find('ALIGNF2SOFT') != -1: bestC, res = ALIGNF2SOFT(train_km_list, train_y, i, tags, data) np.savetxt(res_f, res) np.savetxt(para_f, bestC) if mkl == "TSMKL": W = np.zeros((n_km, n_labels)) for j in xrange(n_labels): print "..label",j W[:,j] = TSMKL(train_km_list, train_y[:,j]) res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) np.savetxt(res_f, W)
class kc(): def __init__(self, cols, metric): self.columns = cols self.metric = metric self.model = KernelCenterer() def fit(self, data): k = pairwise_kernels(data[self.columns], metric=self.metric) self.model.fit(k) def fit_transform(self, data): k = pairwise_kernels(data[self.columns], metric=self.metric) transformed = self.model.fit_transform(k) for idx in range(len(self.columns)): data[self.columns[idx]] = transformed[:, idx] return data def transform(self, data): k = pairwise_kernels(data[self.columns], metric=self.metric) transformed = self.model.transform(k) for idx in range(len(self.columns)): data[self.columns[idx]] = transformed[:, idx] return data
def ovkr_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") # Add noise to the output noise_level = [0.005, 0.010, 0.015, 0.020, 0.025] for nid in xrange(len(noise_level)): noi = noise_level[nid] print "noise", noi, nid Y = addNoise(labels, noi) pred = np.zeros((n_sample, n_labels)) pred_bin = np.zeros((n_sample, n_labels)) # Run for each fold for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../ovkr_result/noisy_weights/"+dataset+"_fold_%d_%s_noise_%d.weights" % (i,mkl, nid) # divide data test = np.array(tags == i) train = np.array(~test) train_y = Y[train,:] test_y = Y[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] test_km_list = [] for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] test_km = km[np.ix_(test, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) if mkl == 'UNIMKL': wei = UNIMKL(n_km, n_labels) else: wei = np.loadtxt(res_f, ndmin=2) normw = np.linalg.norm(wei) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) if normw == 0: wei[:,0] = uni else: wei[:,0] = wei[:,0] / normw train_ckm = np.zeros((n_train,n_train)) for t in range(n_km): train_ckm += wei[t,0]*train_km_list[t] # combine train and test kernel using learned weights test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + wei[t,0]*test_km_list[t] AP = OVKR_train_CV(train_ckm, train_y, tags[train]) pred_label = OVKR_test(test_ckm, AP) pred[test, :] = pred_label pred_real_f = "../ovkr_result/noisy_pred/%s_cvpred_%s_real_noise_%d.npy" % (data, mkl, nid) np.save(pred_real_f, pred)
return K if __name__ == "__main__": classes = generate_spike_classes(1, 2) train = generate_spike_times(classes) test = generate_spike_times(classes) rasterPlot(train) K = compute_K_matrix(train) ############################### # N = K.shape[0] # H = np.eye(N) - np.tile(1./N, [N, N]); # Kc = np.dot(np.dot(H, K), H) kcenterer = KernelCenterer() # kcenterer.fit(K) # Center Kernel Matrix Kc = kcenterer.transform(K) # ############################### D, E = eig(Kc) proj = np.dot(Kc, E[:, 0:2]) ################################ Center test Kt = compute_K_matrix(train, test) # M = Kt.shape[0] # A = np.tile(K.sum(axis=0), [M, 1]) / N # B = np.tile(Kt.sum(axis=1),[N, 1]) /N # Kc2 = Kt - A - B + K.sum()/ N**2; Kc2 = kcenterer.transform(Kt) proj2 = np.dot(Kc2, E[:, 0:2]) # kpca = KernelPCA(kernel="precomputed", n_components=2)
return K if __name__ == '__main__': classes = generate_spike_classes(1, 2) train = generate_spike_times(classes) test = generate_spike_times(classes) rasterPlot(train) K = compute_K_matrix(train) ############################### #N = K.shape[0] #H = np.eye(N) - np.tile(1./N, [N, N]); #Kc = np.dot(np.dot(H, K), H) kcenterer = KernelCenterer() # kcenterer.fit(K) # Center Kernel Matrix Kc = kcenterer.transform(K) # ############################### D, E = eig(Kc) proj = np.dot(Kc, E[:, 0:2]) ################################ Center test Kt = compute_K_matrix(train, test) #M = Kt.shape[0] #A = np.tile(K.sum(axis=0), [M, 1]) / N #B = np.tile(Kt.sum(axis=1),[N, 1]) /N #Kc2 = Kt - A - B + K.sum()/ N**2; Kc2 = kcenterer.transform(Kt) proj2 = np.dot(Kc2, E[:, 0:2]) #kpca = KernelPCA(kernel="precomputed", n_components=2)
Xtest = pls2.transform(Xtest) # Kernel PLS if (FE_kPLS == 1): d = pair.pairwise_distances(Xtrain,Xtrain) aux = np.triu(d) sigma = np.sqrt(np.mean(np.power(aux[aux!=0],2)*0.5)) gamma = 1/(2*sigma**2) ktrain = pair.rbf_kernel(Xtrain,Xtrain,gamma) ktest = pair.rbf_kernel(Xtest,Xtrain,gamma) kcent = KernelCenterer() kcent.fit(ktrain) ktrain = kcent.transform(ktrain) ktest = kcent.transform(ktest) kpls = PLSRegression(n_components = n_comp) kpls.fit(ktrain,Ytrain_m) Xtrain = kpls.transform(ktrain) Xtest = kpls.transform(ktest) # Linear CCA Cannonical Correlation Análisis if (FE_CCA == 1): from sklearn.cross_decomposition import CCA cca = CCA(n_components = n_class) cca.fit(Xtrain,Ytrain_m)
def ovkr_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") #tags = np.array(range(n_sample)) % n_folds + 1 #np.random.seed(1234) #np.random.shuffle(tags) pred = np.zeros((n_sample, n_labels)) # Run for each fold for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../ovkr_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) # divide data test = np.array(tags == i) train = np.array(~test) train_y = labels[train,:] test_y = labels[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] test_km_list = [] for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] test_km = km[np.ix_(test, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) if mkl == 'UNIMKL': wei = UNIMKL(n_km, n_labels) else: wei = np.loadtxt(res_f, ndmin=2) normw = np.linalg.norm(wei) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) if normw == 0: wei[:,0] = uni else: wei[:,0] = wei[:,0] / normw train_ckm = np.zeros((n_train,n_train)) for t in range(n_km): train_ckm += wei[t,0]*train_km_list[t] # combine train and test kernel using learned weights test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + wei[t,0]*test_km_list[t] AP = OVKR_train_CV(train_ckm, train_y, tags[train]) pred_label = OVKR_test(test_ckm, AP) pred[test, :] = pred_label return pred
def svm_mkl(kernel_list, labels, mkl, n_folds, dataset, data): n_sample, n_labels = labels.shape n_km = len(kernel_list) tags = np.loadtxt("../data/cv/"+data+".cv") pred = np.zeros((n_sample, n_labels)) # Run for each fold for i in range(1,n_folds+1): print "Test fold %d" %i res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl) # divide data test = np.array(tags == (i+1 if i+1<6 else 1)) train = np.array(~test) train_y = labels[train,:] test_y = labels[test,:] n_train = len(train_y) n_test = len(test_y) train_km_list = [] test_km_list = [] for km in kernel_list: kc = KernelCenterer() train_km = km[np.ix_(train, train)] test_km = km[np.ix_(test, train)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) if mkl == 'UNIMKL': wei = UNIMKL(n_km, n_labels) else: wei = np.loadtxt(res_f, ndmin=2) # Normalized weights normw = np.linalg.norm(wei, 2, 0) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) for t in xrange(n_labels): if normw[t] == 0: # collapsed solution wei[:,t] = uni else: wei[:,t] = wei[:,t] / normw[t] for j in range(n_labels): tr_y = train_y[:,j] te_y = test_y[:,j] if wei.shape[1] == 1: wj = wei[:,0] else: wj = wei[:,j] ckm = np.zeros((n_train,n_train)) for t in range(n_km): ckm = ckm + wj[t]*train_km_list[t] # combine train and test kernel using learned weights train_ckm = ckm test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + wj[t]*test_km_list[t] pred_label = svm(train_ckm, test_ckm, tr_y, te_y, tags[train], i) pred[test, j] = pred_label return pred
def cls(mkl): for data in datasets: print "####################" print '# ',data print "####################" # consider labels with more than 2% t = 0.02 datadir = '../data/' km_dir = datadir + data + "/" if data == 'Fingerprint': kernels = ['PPKr', 'NB','CP2','NI','LB','CPC','RLB','LC','LI','CPK','RLI','CSC'] km_list = [] y = np.loadtxt(km_dir+"y.txt",ndmin=2) p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] for k in kernels: km_f = datadir + data + ("/%s.txt" % k) km_list.append(normalize_km(np.loadtxt(km_f))) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d") elif data == 'plant' or data == 'psortPos' or data == 'psortNeg': y = loadmat(km_dir+"label_%s.mat" % data)['y'].ravel() km_list = [] fs = commands.getoutput('ls %skern\;substr*.mat' % km_dir).split("\n") for f in fs: km = loadmat(f) km_list.append(km['K']) fs = commands.getoutput('ls %skern\;phylpro*.mat' % km_dir).split("\n") for f in fs: km = loadmat(f) km_list.append(km['K']) fs = commands.getoutput('ls %skm_evalue*.mat' % km_dir).split("\n") for f in fs: km = loadmat(f) km_list.append(km['K']) n_samples = y.shape[0] n_km = len(km_list) y_pred = np.zeros(n_samples) n_labels = 1 tags = np.loadtxt("../data/cv/"+data+".cv") for fold in range(1,6): test_ind = np.where(tags == fold)[0] train_ind = np.where(tags != fold)[0] train_km_list = [] test_km_list = [] train_y = y[train_ind] test_y = y[test_ind] n_train = len(train_ind) n_test = len(test_ind) w_f = "../svm_result/weights/"+data+"_fold_%d_%s.weights" % (fold,mkl) if mkl == 'UNIMKL': w = UNIMKL(n_km, n_labels).ravel() else: w = np.loadtxt(w_f, ndmin=2).ravel() normw = np.linalg.norm(w, 2, 0) uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km)) if normw == 0: w = uni else: w = w / normw for km in km_list: kc = KernelCenterer() train_km = km[np.ix_(train_ind, train_ind)] test_km = km[np.ix_(test_ind, train_ind)] # center train and test kernels kc.fit(train_km) train_km_c = kc.transform(train_km) test_km_c = kc.transform(test_km) train_km_list.append(train_km_c) test_km_list.append(test_km_c) train_ckm = np.zeros((n_train,n_train)) for t in range(n_km): train_ckm = train_ckm + w[t]*train_km_list[t] test_ckm = np.zeros(test_km_list[0].shape) for t in range(n_km): test_ckm = test_ckm + w[t]*test_km_list[t] C_range = [0.01,0.1,1,10,100] param_grid = dict(C=C_range) cv = StratifiedShuffleSplit(train_y,n_iter=5,test_size=0.2,random_state=42) grid = GridSearchCV(SVC(kernel='precomputed'), param_grid=param_grid, cv=cv) grid.fit(train_ckm, train_y) bestC = grid.best_params_['C'] svm = SVC(kernel='precomputed', C=bestC) svm.fit(train_ckm, train_y) y_pred[test_ind] = svm.predict(test_ckm) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) np.savetxt(pred_f, y_pred, fmt="%d") elif data in image_datasets: y = np.loadtxt(km_dir+"y.txt",ndmin=2) p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] linear_km_list = [] for i in range(1,16): name = 'kernel_linear_%d.txt' % i km_f = km_dir+name km = np.loadtxt(km_f) # normalize input kernel !!!!!!!! linear_km_list.append(normalize_km(km)) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(linear_km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d") elif data == 'SPAMBASE': y = np.loadtxt(km_dir+"y.txt",ndmin=2) rbf_km_list = [] gammas = [2**-9, 2**-8, 2**-7, 2**-6, 2**-5, 2**-4, 2**-3] X = np.loadtxt(km_dir+"x.txt") scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) X = preprocessing.normalize(X) for gamma in gammas: km = rbf_kernel(X, gamma=gamma) rbf_km_list.append(km) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(rbf_km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d") else: rbf_km_list = [] gammas = [2**-13,2**-11,2**-9,2**-7,2**-5,2**-3,2**-1,2**1,2**3] X = np.loadtxt(km_dir+"x.txt") scaler = preprocessing.StandardScaler().fit(X) X = scaler.transform(X) X = preprocessing.normalize(X) y = np.loadtxt(km_dir+"y.txt") p = np.sum(y==1,0)/float(y.shape[0]) y = y[:,p>t] for gamma in gammas: km = rbf_kernel(X, gamma=gamma) # normalize input kernel !!!!!!!! rbf_km_list.append(km) pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl) pred = svm_mkl(rbf_km_list, y, mkl, 5, data,data) np.savetxt(pred_f, pred, fmt="%d")
plt.plot(nComponents,kpcaldaScores,lw=3) plt.xlim(1,np.amax(nComponents)) plt.title('kPCA accuracy') plt.xlabel('Number of components') plt.ylabel('accuracy') plt.xlim([500,1500]) plt.legend (['LDA'],loc='lower right') plt.grid(True) if(0): # K-PCA second round ktrain = pair.rbf_kernel(Xtrain,Xtrain,gamma) ktest = pair.rbf_kernel(Xtest,Xtrain,gamma) kcent = KernelCenterer() kcent.fit(ktrain) ktrain = kcent.transform(ktrain) ktest = kcent.transform(ktest) kpca = PCA() kpca.fit_transform(ktrain) cumvarkPCA2 = np.cumsum(kpca.explained_variance_ratio_[0:220]) # Calculate classifiation scores for each component nComponents = np.arange(1,nFeatures) kpcaScores2 = np.zeros((5,np.alen(nComponents))) for i,n in enumerate(nComponents): kpca2 = PCA(n_components=n) kpca2.fit(ktrain) XtrainT = kpca2.transform(ktrain) XtestT = kpca2.transform(ktest)