def test_fail_array_fit(): pca = KPCA(n_components=2) with pytest.raises(ValueError): pca.fit(X1[1])
def test_fail_array_transform(): pca = KPCA(n_components=2) pca.fit(X1) with pytest.raises(ValueError): pca.transform(X1[1])
def test_proj(): pca = KPCA(n_components=2) pca.fit(X1[:2]) exp = np.array([[-0.71, -0.71], [0.71, -0.71]]) assert_almost_equal(pca.X_projected_, exp, decimal=2)
def test_reproj_2(): pca = KPCA(n_components=2) pca.fit(X1) exp = pca.transform(X1[1, None]) assert_almost_equal(pca.X_projected_[1, None], exp, decimal=2)
def test_default_2components(): pca = KPCA(n_components=2) pca.fit(X1) assert pca.X_projected_.shape == (X1.shape[0], 2)
def test_default_0components(): with pytest.raises(AttributeError): pca = KPCA(n_components=0) pca.fit(X1)
#plt.show() plt.savefig('../figs/tutorial/mlxtendex1_2.png') plt.close() #This shows linear PCA unable to generate subspace suitable to linearly separate data. #PCA is unsupevised method, so input data unlabeled. # Radial base function (RBF) kernel PCA (KPCA) from mlxtend.data import iris_data from mlxtend.preprocessing import standardize from mlxtend.feature_extraction import RBFKernelPCA as KPCA #Specify 2-component PCA, gamma choice dependent on dataset, obtained via hyperparameter methods such as Grid search. #Gamma here is gamma said to give 'good' results by creator of tutorial. kpca = KPCA(gamma=15.0, n_components=2) #Fit X with above KPA specifications kpca.fit(X) #Project X values onto 'new' (higher dimensional?) feature space (rep by 'g' in associated notes) X_kpca = kpca.X_projected_ #Plot moons but with kernel-projected X plt.scatter(X_kpca[y==0, 0], X_kpca[y==0, 1], color='red', marker='o', alpha=0.5) plt.scatter(X_kpca[y==1, 0], X_kpca[y==1, 1], color='blue', marker='^', alpha=0.5) plt.title('First 2 principal components after RBF Kernel PCA') plt.xlabel('PC1') plt.ylabel('PC2') #plt.show()
def test_default_components(): pca = KPCA() pca.fit(X1) assert pca.X_projected_.shape == X1.shape
def test_fail_array_transform(): pca = KPCA(n_components=2) pca.fit(X1) exp = pca.transform(X1[1])
def test_fail_array_fit(): pca = KPCA(n_components=2) pca.fit(X1[1])
def test_default_components(): pca = KPCA(n_components=0) pca.fit(X1)
from sklearn.model_selection import cross_val_score et = ExtraTreesClassifier(n_estimators=300, max_depth=None, random_state=0,verbose=5) scores = cross_val_score(et, X, y,scoring='f1_micro',cv=5,verbose=5) print scores.mean() #32% ,max depth=none, n_est=300 #kernel PCA from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import cross_val_score pca = PCA(n_components=700) X_pca = pca.fit(X).transform(X) et = ExtraTreesClassifier(n_estimators=500, max_depth=None, random_state=0,verbose=5) scores = cross_val_score(et, X_pca, y,scoring='f1_micro',cv=5,verbose=5) print scores.mean() from mlxtend.feature_extraction import RBFKernelPCA as KPCA kpca = KPCA(gamma=1.0, n_components=700) kpca.fit(X) X_kpca = kpca.fit(X).transform(X) et = ExtraTreesClassifier(n_estimators=500, max_depth=None, random_state=0,verbose=5) scores = cross_val_score(et, X_pca, y,scoring='f1_micro',cv=5,verbose=5) print scores.mean()
marker='^', #triangle marker alpha=0.5, ) plt.title('BMI vs glucose by sex after linear PCA') plt.ylabel('Serum glucose concentration') plt.xlabel('BMI') plt.legend([sex1, sex2], ['Sex 1', 'Sex 2']) #plt.show() plt.savefig('../../figs/bivariate/subsetkpca1_3') plt.close() ''' #kpca with arbitrary gamma gamma = 500 kpca = KPCA(gamma=gamma, n_components=2) #Fit X with above KPA specifications kpca.fit(X) #Project X values onto 'new' (higher dimensional?) feature space (rep by 'g' in associated notes) X_kpca = kpca.X_projected_ #Graph after kpca #generate graph from matrix for i in range(len(X)): if sex[i] == 1: #Sex 1 glucose v BMI sex1 = plt.scatter( X_kpca[i][0], #bmi X_kpca[i][1], #glucose color='red', marker='o', #circle marker