def test_fail_array_fit():
    pca = KPCA(n_components=2)
    with pytest.raises(ValueError):
        pca.fit(X1[1])
def test_fail_array_transform():
    pca = KPCA(n_components=2)
    pca.fit(X1)
    with pytest.raises(ValueError):
        pca.transform(X1[1])
def test_proj():
    pca = KPCA(n_components=2)
    pca.fit(X1[:2])
    exp = np.array([[-0.71, -0.71], [0.71, -0.71]])
    assert_almost_equal(pca.X_projected_, exp, decimal=2)
def test_reproj_2():
    pca = KPCA(n_components=2)
    pca.fit(X1)
    exp = pca.transform(X1[1, None])
    assert_almost_equal(pca.X_projected_[1, None], exp, decimal=2)
def test_default_2components():
    pca = KPCA(n_components=2)
    pca.fit(X1)
    assert pca.X_projected_.shape == (X1.shape[0], 2)
def test_default_0components():
    with pytest.raises(AttributeError):
        pca = KPCA(n_components=0)
        pca.fit(X1)
Exemple #7
0
#plt.show()
plt.savefig('../figs/tutorial/mlxtendex1_2.png')
plt.close()

#This shows linear PCA unable to generate subspace suitable to linearly separate data.
#PCA is unsupevised method, so input data unlabeled.

# Radial base function (RBF) kernel PCA (KPCA)
from mlxtend.data import iris_data
from mlxtend.preprocessing import standardize
from mlxtend.feature_extraction import RBFKernelPCA as KPCA

#Specify 2-component PCA, gamma choice dependent on dataset, obtained via hyperparameter methods such as Grid search.
#Gamma here is gamma said to give 'good' results by creator of tutorial.
kpca = KPCA(gamma=15.0, n_components=2)
#Fit X with above KPA specifications
kpca.fit(X)
#Project X values onto 'new' (higher dimensional?) feature space (rep by 'g' in associated notes)
X_kpca = kpca.X_projected_

#Plot moons but with kernel-projected X
plt.scatter(X_kpca[y==0, 0], X_kpca[y==0, 1],
            color='red', marker='o', alpha=0.5)
plt.scatter(X_kpca[y==1, 0], X_kpca[y==1, 1],
            color='blue', marker='^', alpha=0.5)

plt.title('First 2 principal components after RBF Kernel PCA')
plt.xlabel('PC1')
plt.ylabel('PC2')
#plt.show()
def test_default_components():
    pca = KPCA()
    pca.fit(X1)
    assert pca.X_projected_.shape == X1.shape
def test_fail_array_transform():
    pca = KPCA(n_components=2)
    pca.fit(X1)
    exp = pca.transform(X1[1])
Exemple #10
0
def test_fail_array_fit():
    pca = KPCA(n_components=2)
    pca.fit(X1[1])
Exemple #11
0
def test_default_components():
    pca = KPCA(n_components=0)
    pca.fit(X1)
Exemple #12
0
from sklearn.model_selection import cross_val_score
et = ExtraTreesClassifier(n_estimators=300, max_depth=None, random_state=0,verbose=5)
scores = cross_val_score(et, X, y,scoring='f1_micro',cv=5,verbose=5)
print scores.mean()
#32% ,max depth=none,  n_est=300



#kernel PCA
from mlxtend.feature_extraction import PrincipalComponentAnalysis as PCA
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.model_selection import cross_val_score
pca = PCA(n_components=700)
X_pca = pca.fit(X).transform(X)
et = ExtraTreesClassifier(n_estimators=500, max_depth=None, random_state=0,verbose=5)
scores = cross_val_score(et, X_pca, y,scoring='f1_micro',cv=5,verbose=5)
print scores.mean()



from mlxtend.feature_extraction import RBFKernelPCA as KPCA

kpca = KPCA(gamma=1.0, n_components=700)
kpca.fit(X)
X_kpca = kpca.fit(X).transform(X)
et = ExtraTreesClassifier(n_estimators=500, max_depth=None, random_state=0,verbose=5)
scores = cross_val_score(et, X_pca, y,scoring='f1_micro',cv=5,verbose=5)
print scores.mean()


Exemple #13
0
            marker='^',     #triangle marker
            alpha=0.5,
            )
plt.title('BMI vs glucose by sex after linear PCA')
plt.ylabel('Serum glucose concentration')
plt.xlabel('BMI')
plt.legend([sex1, sex2], ['Sex 1', 'Sex 2'])

#plt.show()
plt.savefig('../../figs/bivariate/subsetkpca1_3')
plt.close()
'''

#kpca with arbitrary gamma
gamma = 500
kpca = KPCA(gamma=gamma, n_components=2)
#Fit X with above KPA specifications
kpca.fit(X)
#Project X values onto 'new' (higher dimensional?) feature space (rep by 'g' in associated notes)
X_kpca = kpca.X_projected_

#Graph after kpca
#generate graph from matrix
for i in range(len(X)):
    if sex[i] == 1:
        #Sex 1 glucose v BMI
        sex1 = plt.scatter(
            X_kpca[i][0],  #bmi
            X_kpca[i][1],  #glucose
            color='red',
            marker='o',  #circle marker