def set_kpca_obj(pars, data, sntype, type_number): """ Set kpca object based on cross-validation results. input: pars, dict output from read_hyperpar data, array output from read_matrix sntype, list output from read_matrix type_number, dict dictionary to translate types between raw data and final classification keywords -> final classificaton elements values -> identifiers in raw data output: obj_kpca, KernelPCA obj tailored with cross-validation results spec_matrix, array low dimension spectroscopic matrix labels, list classes as defined in raw data files """ from sklearn.decomposition import KernelPCA import numpy as np # start kpca object obj_kpca = KernelPCA() obj_kpca.eigen_solver = pars['eigen_solver'] obj_kpca.kernel = pars['kernel'] obj_kpca.alpha = pars['alpha'] obj_kpca.gamma = pars['gamma'] obj_kpca.n_components = pars['n_components'] obj_kpca.coef0 = pars['coef0'] obj_kpca.degree = pars['degree'] obj_kpca.tol = pars['tol'] obj_kpca.fit_inverse_transform = pars['fit_inverse_transform'] obj_kpca.remove_zero_eig = pars['remove_zero_eig'] obj_kpca.kernel_params = pars['kernel_params'] obj_kpca.max_iter = pars['max_iter'] spec_matrix = obj_kpca.fit_transform(data) # construct label vector labels = [] for elem in sntype: for classes in type_number.keys(): if elem in type_number[classes]: labels.append(classes) labels = np.array(labels) return obj_kpca, spec_matrix, labels
print("SVM classifier's test accuracy on LDA:") print(accuracy_score(y_test,y_test_pred)) ###kPCA & LR print('\n' * 3) print("<3.1> refit logistic regression classifier on the kPCA transformed datasets.") from sklearn.decomposition import KernelPCA lr = LogisticRegression() kpca = KernelPCA(n_components=2, kernel='rbf', gamma=15) parameters=[0.005,0.010,0.015,0.020,0.025] for i in range(5): kpca.gamma=parameters[i] X_train_kpca = kpca.fit_transform(X_train_std) X_test_kpca = kpca.transform(X_test_std) lr.fit(X_train_kpca, y_train) y_train_pred = lr.predict(X_train_kpca) y_test_pred = lr.predict(X_test_kpca) print("LR classifier's train accuracy on kPCA:") print(accuracy_score(y_train,y_train_pred)) print("LR classifier's test accuracy on kPCA:") print(accuracy_score(y_test,y_test_pred)) ###kPCA & SVM print('\n' * 3) print("<3.2> refit SVM regression classifier on the kPCA transformed datasets.") from sklearn.svm import SVC svm = SVC(kernel='linear', C=1.0, random_state=1)
#SVM svm2 = SVC() lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train_std, y_train) X_test_lda = lda.transform(X_test_std) svm2.fit(X_train_lda, y_train) print('SVM_lda_train accuracy score: %.5f' % (svm2.score(X_train_lda, y_train))) print('SVM_lda_test accuracy score: %.5f' % (svm2.score(X_test_lda, y_test))) #KPCA #LogisticRegression gamma_space = np.logspace(-2, 0, 4) scikit_kpca = KernelPCA(n_components=13, kernel='rbf') for gamma in gamma_space: scikit_kpca.gamma = gamma X_train_kpca = scikit_kpca.fit_transform(X_train_std) X_test_kpca = scikit_kpca.transform(X_test_std) log3 = LogisticRegression() log3.fit(X_train_kpca, y_train) print('(gamma:' + str(gamma) + ') LR_kpca_train accuracy score: %.5f' % (log3.score(X_train_kpca, y_train))) print('(gamma:' + str(gamma) + ') LR_kpca_test accuracy score: %.5f' % (log3.score(X_test_kpca, y_test))) #SVM gamma_space = np.logspace(-2, 0, 4) scikit_kpca = KernelPCA(n_components=13, kernel='rbf') for gamma in gamma_space: scikit_kpca.gamma = gamma X_train_kpca = scikit_kpca.fit_transform(X_train_std)