def test_fail_array_fit(): pca = KPCA(n_components=2) pca.fit(X1[1])
def test_fail_array_transform(): pca = KPCA(n_components=2) pca.fit(X1) pca.transform(X1[1])
def test_fail_array_fit(): pca = KPCA(n_components=2) with pytest.raises(ValueError): pca.fit(X1[1])
def test_reproj_2(): pca = KPCA(n_components=2) pca.fit(X1) exp = pca.transform(X1[1, None]) assert_almost_equal(pca.X_projected_[1, None], exp, decimal=2)
def test_proj(): pca = KPCA(n_components=2) pca.fit(X1[:2]) exp = np.array([[-0.71, -0.71], [0.71, -0.71]]) assert_almost_equal(pca.X_projected_, exp, decimal=2)
def test_fail_array_transform(): pca = KPCA(n_components=2) pca.fit(X1) exp = pca.transform(X1[1])
def test_default_2components(): pca = KPCA(n_components=2) pca.fit(X1) assert pca.X_projected_.shape == (X1.shape[0], 2)
def test_default_components(): pca = KPCA(n_components=0) pca.fit(X1)
#alpha in case of pyplot is opacity. plt.scatter(X[y == 0, 0], X[y == 0, 1], color='red', alpha=0.5) plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue', alpha=0.5) plt.title('Concentric circles') plt.ylabel('y coordinate') plt.xlabel('x coordinate') #plt.show() plt.savefig('../figs/tutorial/mlxtendex2_1.png') plt.close() from mlxtend.data import iris_data from mlxtend.preprocessing import standardize from mlxtend.feature_extraction import RBFKernelPCA as KPCA #2-component RBF KPCA kpca = KPCA(gamma=15.0, n_components=2) #X fit to specified KPCA parameters kpca.fit(X) # Fit X projected to new feature space. X_kpca = kpca.X_projected_ plt.scatter(X_kpca[y == 0, 0], X_kpca[y == 0, 1], color='red', marker='o', alpha=0.5) plt.scatter(X_kpca[y == 1, 0], X_kpca[y == 1, 1], color='blue',
from sklearn.model_selection import cross_val_score et = ExtraTreesClassifier(n_estimators=300, max_depth=None, random_state=0,verbose=5) scores = cross_val_score(et, X, y,scoring='f1_micro',cv=5,verbose=5) print scores.mean() #32% ,max depth=none, n_est=300 #kernel PCA from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA from sklearn.ensemble import ExtraTreesClassifier from sklearn.model_selection import cross_val_score pca = PCA(n_components=700) X_pca = pca.fit(X).transform(X) et = ExtraTreesClassifier(n_estimators=500, max_depth=None, random_state=0,verbose=5) scores = cross_val_score(et, X_pca, y,scoring='f1_micro',cv=5,verbose=5) print scores.mean() from mlxtend.feature_extraction import RBFKernelPCA as KPCA kpca = KPCA(gamma=1.0, n_components=700) kpca.fit(X) X_kpca = kpca.fit(X).transform(X) et = ExtraTreesClassifier(n_estimators=500, max_depth=None, random_state=0,verbose=5) scores = cross_val_score(et, X_pca, y,scoring='f1_micro',cv=5,verbose=5) print scores.mean()
def test_fail_array_transform(): pca = KPCA(n_components=2) pca.fit(X1) with pytest.raises(ValueError): pca.transform(X1[1])
#plt.show() plt.savefig('../figs/tutorial/mlxtendex1_2.png') plt.close() #This shows linear PCA unable to generate subspace suitable to linearly separate data. #PCA is unsupevised method, so input data unlabeled. # Radial base function (RBF) kernel PCA (KPCA) from mlxtend.data import iris_data from mlxtend.preprocessing import standardize from mlxtend.feature_extraction import RBFKernelPCA as KPCA #Specify 2-component PCA, gamma choice dependent on dataset, obtained via hyperparameter methods such as Grid search. #Gamma here is gamma said to give 'good' results by creator of tutorial. kpca = KPCA(gamma=15.0, n_components=2) #Fit X with above KPA specifications kpca.fit(X) #Project X values onto 'new' (higher dimensional?) feature space (rep by 'g' in associated notes) X_kpca = kpca.X_projected_ #Plot moons but with kernel-projected X plt.scatter(X_kpca[y==0, 0], X_kpca[y==0, 1], color='red', marker='o', alpha=0.5) plt.scatter(X_kpca[y==1, 0], X_kpca[y==1, 1], color='blue', marker='^', alpha=0.5) plt.title('First 2 principal components after RBF Kernel PCA') plt.xlabel('PC1') plt.ylabel('PC2') #plt.show()
def test_default_components(): pca = KPCA() pca.fit(X1) assert pca.X_projected_.shape == X1.shape
def test_default_0components(): with pytest.raises(AttributeError): pca = KPCA(n_components=0) pca.fit(X1)