def test_kernelcenterer_vs_sklearn():
    # Compare msmbuilder.preprocessing.KernelCenterer
    # with sklearn.preprocessing.KernelCenterer

    kernelcentererr = KernelCentererR()
    kernelcentererr.fit(np.concatenate(trajs))

    kernelcenterer = KernelCenterer()
    kernelcenterer.fit(trajs)

    y_ref1 = kernelcentererr.transform(trajs[0])
    y1 = kernelcenterer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
Esempio n. 2
0
def test_kernelcenterer_vs_sklearn():
    # Compare msmbuilder.preprocessing.KernelCenterer
    # with sklearn.preprocessing.KernelCenterer

    kernelcentererr = KernelCentererR()
    kernelcentererr.fit(np.concatenate(trajs))

    kernelcenterer = KernelCenterer()
    kernelcenterer.fit(trajs)

    y_ref1 = kernelcentererr.transform(trajs[0])
    y1 = kernelcenterer.transform(trajs)[0]

    np.testing.assert_array_almost_equal(y_ref1, y1)
Esempio n. 3
0
def cv_mkl(kernel_list, labels, mkl, n_folds, dataset, data):

    n_sample, n_labels = labels.shape
    n_km = len(kernel_list)
    tags = np.loadtxt("../data/cv/"+data+".cv")

    for i in range(1,n_folds+1):
        print "Test fold %d" %i
        res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl)
        para_f = "../svm_result/upperbound/"+dataset+"_fold_%d_%s.ubound" % (i,mkl)
        test = np.array(tags == i)
        train = np.array(~test)
        train_y = labels[train,:]
        test_y = labels[test,:]
        n_train = len(train_y)
        n_test = len(test_y)
        train_km_list = []

        # all train kernels are nomalized and centered
        for km in kernel_list:
            kc = KernelCenterer()
            train_km = km[np.ix_(train, train)]
            # center train and test kernels                      
            kc.fit(train_km)
            train_km_c = kc.transform(train_km)
            train_km_list.append(train_km_c)

        if mkl == 'UNIMKL':
            res = UNIMKL(train_km_list, train_y)
            np.savetxt(res_f, res)            
        if mkl == 'ALIGNF2':
            res = alignf2(train_km_list, train_y, data)
            np.savetxt(res_f, res)
        if mkl.find('ALIGNF2SOFT') != -1:
            bestC, res = ALIGNF2SOFT(train_km_list, train_y, i, tags, data)
            np.savetxt(res_f, res)
            np.savetxt(para_f, bestC)
        if mkl == "TSMKL":
            W = np.zeros((n_km, n_labels))
            for j in xrange(n_labels):
                print "..label",j
                W[:,j] = TSMKL(train_km_list, train_y[:,j])
            res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl)
            np.savetxt(res_f, W)
Esempio n. 4
0
class kc():
    def __init__(self, cols, metric):
        self.columns = cols
        self.metric = metric
        self.model = KernelCenterer()

    def fit(self, data):
        k = pairwise_kernels(data[self.columns], metric=self.metric)
        self.model.fit(k)

    def fit_transform(self, data):
        k = pairwise_kernels(data[self.columns], metric=self.metric)
        transformed = self.model.fit_transform(k)
        for idx in range(len(self.columns)):
            data[self.columns[idx]] = transformed[:, idx]
        return data

    def transform(self, data):
        k = pairwise_kernels(data[self.columns], metric=self.metric)
        transformed = self.model.transform(k)
        for idx in range(len(self.columns)):
            data[self.columns[idx]] = transformed[:, idx]
        return data
def ovkr_mkl(kernel_list, labels, mkl, n_folds, dataset, data):
    n_sample, n_labels = labels.shape
    n_km = len(kernel_list)
    tags = np.loadtxt("../data/cv/"+data+".cv")

    # Add noise to the output
    noise_level = [0.005, 0.010, 0.015, 0.020, 0.025]

    for nid in xrange(len(noise_level)):
        noi = noise_level[nid]
        print "noise", noi, nid
        Y = addNoise(labels, noi)    

        pred = np.zeros((n_sample, n_labels))
        pred_bin = np.zeros((n_sample, n_labels))

        # Run for each fold   
        for i in range(1,n_folds+1):
            print "Test fold %d" %i
            res_f = "../ovkr_result/noisy_weights/"+dataset+"_fold_%d_%s_noise_%d.weights" % (i,mkl, nid)
            # divide data
            test = np.array(tags == i)
            train = np.array(~test)
            train_y = Y[train,:]
            test_y = Y[test,:]
            n_train = len(train_y)
            n_test = len(test_y)

            train_km_list = []
            test_km_list = []
            for km in kernel_list:
                kc = KernelCenterer()
                train_km = km[np.ix_(train, train)]
                test_km = km[np.ix_(test, train)]
                # center train and test kernels                      
                kc.fit(train_km)
                train_km_c = kc.transform(train_km)
                test_km_c = kc.transform(test_km)
                train_km_list.append(train_km_c)
                test_km_list.append(test_km_c)

            if mkl == 'UNIMKL':
                wei = UNIMKL(n_km, n_labels)
            else:
                wei = np.loadtxt(res_f, ndmin=2)        

            normw = np.linalg.norm(wei)
            uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km))
            if normw == 0:
                wei[:,0] = uni
            else:
                wei[:,0] = wei[:,0] / normw

            train_ckm = np.zeros((n_train,n_train))
            for t in range(n_km):
                train_ckm += wei[t,0]*train_km_list[t]

            # combine train and test kernel using learned weights        
            test_ckm = np.zeros(test_km_list[0].shape)
            for t in range(n_km):
                test_ckm = test_ckm + wei[t,0]*test_km_list[t]

            AP = OVKR_train_CV(train_ckm, train_y, tags[train])
            pred_label = OVKR_test(test_ckm, AP)
            pred[test, :] = pred_label

        pred_real_f = "../ovkr_result/noisy_pred/%s_cvpred_%s_real_noise_%d.npy" % (data, mkl, nid)
        np.save(pred_real_f, pred)
    return K


if __name__ == "__main__":

    classes = generate_spike_classes(1, 2)
    train = generate_spike_times(classes)
    test = generate_spike_times(classes)
    rasterPlot(train)
    K = compute_K_matrix(train)
    ###############################
    # N = K.shape[0]
    # H = np.eye(N) - np.tile(1./N, [N, N]);
    # Kc = np.dot(np.dot(H, K), H)
    kcenterer = KernelCenterer()  #
    kcenterer.fit(K)  # Center Kernel Matrix
    Kc = kcenterer.transform(K)  #
    ###############################
    D, E = eig(Kc)
    proj = np.dot(Kc, E[:, 0:2])

    ################################ Center test
    Kt = compute_K_matrix(train, test)
    # M = Kt.shape[0]
    # A = np.tile(K.sum(axis=0), [M, 1]) / N
    # B = np.tile(Kt.sum(axis=1),[N, 1]) /N
    # Kc2 = Kt - A - B + K.sum()/ N**2;
    Kc2 = kcenterer.transform(Kt)
    proj2 = np.dot(Kc2, E[:, 0:2])

    # kpca = KernelPCA(kernel="precomputed", n_components=2)
    return K


if __name__ == '__main__':

    classes = generate_spike_classes(1, 2)
    train = generate_spike_times(classes)
    test = generate_spike_times(classes)
    rasterPlot(train)
    K = compute_K_matrix(train)
    ###############################
    #N = K.shape[0]
    #H = np.eye(N) - np.tile(1./N, [N, N]);
    #Kc = np.dot(np.dot(H, K), H)
    kcenterer = KernelCenterer()  #
    kcenterer.fit(K)  # Center Kernel Matrix
    Kc = kcenterer.transform(K)  #
    ###############################
    D, E = eig(Kc)
    proj = np.dot(Kc, E[:, 0:2])

    ################################ Center test
    Kt = compute_K_matrix(train, test)
    #M = Kt.shape[0]
    #A = np.tile(K.sum(axis=0), [M, 1]) / N
    #B = np.tile(Kt.sum(axis=1),[N, 1]) /N
    #Kc2 = Kt - A - B + K.sum()/ N**2;
    Kc2 = kcenterer.transform(Kt)
    proj2 = np.dot(Kc2, E[:, 0:2])

    #kpca = KernelPCA(kernel="precomputed", n_components=2)
Esempio n. 8
0
    Xtest = pls2.transform(Xtest)
    

    
# Kernel PLS

if (FE_kPLS == 1):
    d = pair.pairwise_distances(Xtrain,Xtrain)
    aux = np.triu(d)
    sigma = np.sqrt(np.mean(np.power(aux[aux!=0],2)*0.5))
    gamma = 1/(2*sigma**2)
    
    ktrain = pair.rbf_kernel(Xtrain,Xtrain,gamma)
    ktest = pair.rbf_kernel(Xtest,Xtrain,gamma)
    kcent = KernelCenterer()
    kcent.fit(ktrain)
    ktrain = kcent.transform(ktrain)
    ktest = kcent.transform(ktest)
    
    kpls = PLSRegression(n_components = n_comp)
    kpls.fit(ktrain,Ytrain_m)
    
    Xtrain = kpls.transform(ktrain)
    Xtest = kpls.transform(ktest)
    
# Linear CCA  Cannonical Correlation Análisis

if (FE_CCA == 1):
    from sklearn.cross_decomposition import CCA
    cca = CCA(n_components = n_class)
    cca.fit(Xtrain,Ytrain_m)
Esempio n. 9
0
def ovkr_mkl(kernel_list, labels, mkl, n_folds, dataset, data):
    n_sample, n_labels = labels.shape
    n_km = len(kernel_list)
    tags = np.loadtxt("../data/cv/"+data+".cv")
    #tags = np.array(range(n_sample)) % n_folds + 1
    #np.random.seed(1234)
    #np.random.shuffle(tags)

    pred = np.zeros((n_sample, n_labels))

    # Run for each fold   
    for i in range(1,n_folds+1):
        print "Test fold %d" %i
        res_f = "../ovkr_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl)

        # divide data
        test = np.array(tags == i)
        train = np.array(~test)
        train_y = labels[train,:]
        test_y = labels[test,:]
        n_train = len(train_y)
        n_test = len(test_y)

        train_km_list = []
        test_km_list = []
        for km in kernel_list:
            kc = KernelCenterer()
            train_km = km[np.ix_(train, train)]
            test_km = km[np.ix_(test, train)]
            # center train and test kernels                      
            kc.fit(train_km)
            train_km_c = kc.transform(train_km)
            test_km_c = kc.transform(test_km)
            train_km_list.append(train_km_c)
            test_km_list.append(test_km_c)

        if mkl == 'UNIMKL':
            wei = UNIMKL(n_km, n_labels)
        else:
            wei = np.loadtxt(res_f, ndmin=2)        

        normw = np.linalg.norm(wei)
        uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km))
        if normw == 0:
            wei[:,0] = uni
        else:
            wei[:,0] = wei[:,0] / normw

        train_ckm = np.zeros((n_train,n_train))
        for t in range(n_km):
            train_ckm += wei[t,0]*train_km_list[t]

        # combine train and test kernel using learned weights        
        test_ckm = np.zeros(test_km_list[0].shape)
        for t in range(n_km):
            test_ckm = test_ckm + wei[t,0]*test_km_list[t]
                
        AP = OVKR_train_CV(train_ckm, train_y, tags[train])
        pred_label = OVKR_test(test_ckm, AP)
        pred[test, :] = pred_label
    return pred
Esempio n. 10
0
def svm_mkl(kernel_list, labels, mkl, n_folds, dataset, data):
    n_sample, n_labels = labels.shape
    n_km = len(kernel_list)
    tags = np.loadtxt("../data/cv/"+data+".cv")

    pred = np.zeros((n_sample, n_labels))
    # Run for each fold   
    for i in range(1,n_folds+1):
        print "Test fold %d" %i
        res_f = "../svm_result/weights/"+dataset+"_fold_%d_%s.weights" % (i,mkl)

        # divide data
        test = np.array(tags == (i+1 if i+1<6 else 1))
        train = np.array(~test)
        train_y = labels[train,:]
        test_y = labels[test,:]
        n_train = len(train_y)
        n_test = len(test_y)

        train_km_list = []
        test_km_list = []
        for km in kernel_list:
            kc = KernelCenterer()
            train_km = km[np.ix_(train, train)]
            test_km = km[np.ix_(test, train)]
            # center train and test kernels                      
            kc.fit(train_km)
            train_km_c = kc.transform(train_km)
            test_km_c = kc.transform(test_km)
            train_km_list.append(train_km_c)
            test_km_list.append(test_km_c)

        if mkl == 'UNIMKL':
            wei = UNIMKL(n_km, n_labels)
        else:
            wei = np.loadtxt(res_f, ndmin=2)        

        # Normalized weights 
        normw = np.linalg.norm(wei, 2, 0)
        uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km))
        for t in xrange(n_labels):
            if normw[t] == 0:  # collapsed solution
                wei[:,t] = uni
            else:
                wei[:,t] = wei[:,t] / normw[t]

        for j in range(n_labels):
            tr_y = train_y[:,j]
            te_y = test_y[:,j]
            if wei.shape[1] == 1:
                wj = wei[:,0]
            else:
                wj = wei[:,j]
            ckm = np.zeros((n_train,n_train))
            for t in range(n_km):
                ckm = ckm + wj[t]*train_km_list[t]

            # combine train and test kernel using learned weights        
            train_ckm = ckm
            test_ckm = np.zeros(test_km_list[0].shape)
            for t in range(n_km):
                test_ckm = test_ckm + wj[t]*test_km_list[t]
                
            pred_label = svm(train_ckm, test_ckm, tr_y, te_y, tags[train], i)
            pred[test, j] = pred_label
    return pred
Esempio n. 11
0
def cls(mkl):

    for data in datasets:
        print "####################"
        print '# ',data
        print "####################" 
        # consider labels with more than 2%
        t = 0.02
        datadir = '../data/'
        km_dir = datadir + data + "/"

        if data == 'Fingerprint':
            kernels = ['PPKr', 'NB','CP2','NI','LB','CPC','RLB','LC','LI','CPK','RLI','CSC']
            km_list = []
            y = np.loadtxt(km_dir+"y.txt",ndmin=2)
            p = np.sum(y==1,0)/float(y.shape[0])        
            y = y[:,p>t]
            for k in kernels:
                km_f = datadir + data + ("/%s.txt" % k)
                km_list.append(normalize_km(np.loadtxt(km_f)))
            pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl)
            pred = svm_mkl(km_list, y, mkl, 5, data,data)
            np.savetxt(pred_f, pred, fmt="%d")

        elif data == 'plant' or data == 'psortPos' or data == 'psortNeg':
            y = loadmat(km_dir+"label_%s.mat" % data)['y'].ravel()
            km_list = []
            fs = commands.getoutput('ls %skern\;substr*.mat' % km_dir).split("\n")
            for f in fs:
                km = loadmat(f)
                km_list.append(km['K'])
            fs = commands.getoutput('ls %skern\;phylpro*.mat' % km_dir).split("\n")
            for f in fs:
                km = loadmat(f)
                km_list.append(km['K'])
            fs = commands.getoutput('ls %skm_evalue*.mat' % km_dir).split("\n")
            for f in fs:
                km = loadmat(f)
                km_list.append(km['K'])

            n_samples = y.shape[0]
            n_km = len(km_list)
            y_pred = np.zeros(n_samples)
            n_labels = 1
            tags = np.loadtxt("../data/cv/"+data+".cv")
            for fold in range(1,6):
                test_ind = np.where(tags == fold)[0]
                train_ind = np.where(tags != fold)[0]
                train_km_list = []
                test_km_list = []
                train_y = y[train_ind]
                test_y = y[test_ind]
                n_train = len(train_ind)
                n_test = len(test_ind)

                w_f = "../svm_result/weights/"+data+"_fold_%d_%s.weights" % (fold,mkl)

                if mkl == 'UNIMKL':
                    w = UNIMKL(n_km, n_labels).ravel()
                else:
                    w = np.loadtxt(w_f, ndmin=2).ravel()

                normw = np.linalg.norm(w, 2, 0)
                uni = np.ones(n_km) / np.linalg.norm(np.ones(n_km))
                if normw == 0:
                    w = uni
                else:
                    w = w / normw

                for km in km_list:
                    kc = KernelCenterer()
                    train_km = km[np.ix_(train_ind, train_ind)]
                    test_km = km[np.ix_(test_ind, train_ind)]
                    # center train and test kernels                      
                    kc.fit(train_km)
                    train_km_c = kc.transform(train_km)
                    test_km_c = kc.transform(test_km)
                    train_km_list.append(train_km_c)
                    test_km_list.append(test_km_c)

                train_ckm = np.zeros((n_train,n_train))
                for t in range(n_km):
                    train_ckm = train_ckm + w[t]*train_km_list[t]
                test_ckm = np.zeros(test_km_list[0].shape)
                for t in range(n_km):
                    test_ckm = test_ckm + w[t]*test_km_list[t]
                
                C_range = [0.01,0.1,1,10,100]
                param_grid = dict(C=C_range)
                cv = StratifiedShuffleSplit(train_y,n_iter=5,test_size=0.2,random_state=42)
                grid = GridSearchCV(SVC(kernel='precomputed'), param_grid=param_grid, cv=cv)
                grid.fit(train_ckm, train_y)
                bestC = grid.best_params_['C']
                svm = SVC(kernel='precomputed', C=bestC)
                svm.fit(train_ckm, train_y)
                y_pred[test_ind] = svm.predict(test_ckm)

            pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl)
            np.savetxt(pred_f, y_pred, fmt="%d")

        elif data in image_datasets:
            y = np.loadtxt(km_dir+"y.txt",ndmin=2)
            p = np.sum(y==1,0)/float(y.shape[0])        
            y = y[:,p>t]
            linear_km_list = []
            for i in range(1,16):
                name = 'kernel_linear_%d.txt' % i
                km_f = km_dir+name
                km = np.loadtxt(km_f)
                # normalize input kernel !!!!!!!!
                linear_km_list.append(normalize_km(km))
            pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl)
            pred = svm_mkl(linear_km_list, y, mkl, 5, data,data)
            np.savetxt(pred_f, pred, fmt="%d")

        elif data == 'SPAMBASE':
            y = np.loadtxt(km_dir+"y.txt",ndmin=2)
            rbf_km_list = []
            gammas = [2**-9, 2**-8, 2**-7, 2**-6, 2**-5, 2**-4, 2**-3]
            X = np.loadtxt(km_dir+"x.txt")
            scaler = preprocessing.StandardScaler().fit(X)
            X = scaler.transform(X)
            X = preprocessing.normalize(X)
            for gamma in gammas:
                km = rbf_kernel(X, gamma=gamma)
                rbf_km_list.append(km)
            pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl)
            pred = svm_mkl(rbf_km_list, y, mkl, 5, data,data)
            np.savetxt(pred_f, pred, fmt="%d")

        else:
            rbf_km_list = []
            gammas = [2**-13,2**-11,2**-9,2**-7,2**-5,2**-3,2**-1,2**1,2**3]
            X = np.loadtxt(km_dir+"x.txt")
            scaler = preprocessing.StandardScaler().fit(X)
            X = scaler.transform(X)
            X = preprocessing.normalize(X)
            y = np.loadtxt(km_dir+"y.txt")
            p = np.sum(y==1,0)/float(y.shape[0])        
            y = y[:,p>t]
            for gamma in gammas:
                km = rbf_kernel(X, gamma=gamma)
                # normalize input kernel !!!!!!!!
                rbf_km_list.append(km)
            pred_f = "../svm_result/pred/%s_cvpred_%s.txt" % (data, mkl)
            pred = svm_mkl(rbf_km_list, y, mkl, 5, data,data)
            np.savetxt(pred_f, pred, fmt="%d")
Esempio n. 12
0
    plt.plot(nComponents,kpcaldaScores,lw=3)

    plt.xlim(1,np.amax(nComponents))
    plt.title('kPCA accuracy')
    plt.xlabel('Number of components')
    plt.ylabel('accuracy')
    plt.xlim([500,1500])
    plt.legend (['LDA'],loc='lower right')
    plt.grid(True)    

if(0):
    # K-PCA second round
    ktrain = pair.rbf_kernel(Xtrain,Xtrain,gamma)
    ktest = pair.rbf_kernel(Xtest,Xtrain,gamma)
    kcent = KernelCenterer()
    kcent.fit(ktrain)
    ktrain = kcent.transform(ktrain)
    ktest = kcent.transform(ktest)
    
    kpca = PCA()
    kpca.fit_transform(ktrain)
    cumvarkPCA2 = np.cumsum(kpca.explained_variance_ratio_[0:220])
    
    # Calculate classifiation scores for each component
    nComponents = np.arange(1,nFeatures)
    kpcaScores2 = np.zeros((5,np.alen(nComponents)))
    for i,n in enumerate(nComponents):   
        kpca2 = PCA(n_components=n)
        kpca2.fit(ktrain)
        XtrainT = kpca2.transform(ktrain)
        XtestT = kpca2.transform(ktest)