def apply_pca(X): eigenvectors, eigenvalues = compute_pca(X) # Stack first two eigenvectors E_2 = np.column_stack((eigenvectors[0], eigenvectors[1])) X_PCA = np.dot(X, E_2) # Print report data print "Largest two eigenvalues:" print eigenvalues[0] print eigenvalues[1] print "\n" print "First five rows of E_2:" print E_2[0:5, :] print "\n" print "First five rows of X_PCA:" print X_PCA[0:5, :] return X_PCA
def apply_pca(matrix): [EVecs, EVal] = compute_pca(matrix) E2 = EVecs[:, :2] X_PCA = matrix.dot(E2) print("2 eigenvalues:", EVal[:2]) print("5 rows of E2:\n", E2[:5, :]) print("5 rows of X_PCA:\n", X_PCA[:5, :]) return X_PCA
def show_cumulative_variance(): import scipy.io data = scipy.io.loadmat('svhn.mat') train_data = np.array(data['train_features']) cumsum = np.cumsum(compute_pca(train_data)[1]) x_axis = [x for x in range(len(cumsum))] plt.plot(x_axis, cumsum, 'ro') plt.xlabel("Principal components") plt.ylabel("Comulative variance") plt.show()
def PCA(NormMatrix, file2D, file3D): transposed_matrix = NormMatrix.T proj, pve, pcs = compute_pca(transposed_matrix, scaled=True, logged=False, kernel=None, kernel_kwargs=None, variant='pca', pf=1) print '\nTransposed Matrix =', np.shape(transposed_matrix) print '\nProj =', np.shape(proj) print '\nPVE =', np.shape(pve) print '\nPCS =', np.shape(pcs) print proj experiment = [ "Human Liver Carcinoma", "Heatshock", "UV", "Hepatocyte GF", "Interferons", "Brain" ] plot_pca(file2D, proj, pcs=(0, 1), labels=experiment, label_points=True, levels=None, colors=None, legend=True, s=100) plot_pca3D(file3D, proj, pcs=(0, 1, 2), labels=experiment, label_points=True, levels=None, colors=None, legend=True, s=100)
# Reduce dimensionality of test data and center it using the training mean T = test_x - mu T = T.dot(EVecs) probabilities = np.zeros((classes, test_y.shape[0])) # Find posterior probabilities for every class for k in range(classes): probabilities[k, :] = gaussianClassifier(mu_hat[k, :], sigma_hat[:, :, k], T) # Find predicted class labels for the test data and correct it so it starts from 1 labels_predicted = np.argmax(probabilities, axis=0) + 1 confusion_matrix = MyConfusionMatrix(test_y, labels_predicted) return confusion_matrix EVecs, EVals = compute_pca(train_x) confusion_matrix = trainAndTest(train_x, train_y, test_x, test_y, EVecs, 100) scipy.io.savemat('confmat_d100.mat', {'Confusion Matrix': confusion_matrix}) classification_rate = np.diag(confusion_matrix / confusion_matrix.sum(axis=0)) for float in classification_rate: print '{:.1%}'.format(float) # Pretty printing for the confusion matrix using Pandas. Used only for the report. # y_actual = pd.Index(np.arange(1,6,1), name="Actual") # y_pred = pd.Index(np.arange(1,6,1), name="Predicted") # df = pd.DataFrame(data = confusion_matrix.astype(int), index = y_actual, columns=y_pred) # print df
import scipy.io import compute_pca as cp import numpy as np from sklearn import decomposition from matplotlib.mlab import PCA data = scipy.io.loadmat('svhn.mat') X=data['train_features'] (vec,val)=cp.compute_pca(X) E2=vec[:,0:2] egvals=val[0:2] X_PCA=np.dot(X,E2) print X.shape print E2.shape v1=E2[:,0] v2=E2[:,1] #sklearn_pca = sklearnPCA(n_components=2) #sklearn_transf = sklearn_pca.fit_transform(X) #print sklearn_transf pca = decomposition.PCA(n_components=2) pca.fit(X.T) Y = pca.transform(X.T) print Y[0:2,0:2] print X_PCA[0:2,0:2] #print np.isclose(sklearn_transf,X_PCA) #print type(mlab_pca) #np.savetxt("eigenvalues.out",egvals,delimiter=' , ') #np.savetxt("xpca.out",X_PCA,delimiter=" ")
import scipy.io import numpy as np import compute_pca import matplotlib.pyplot as plt data = scipy.io.loadmat('svhn.mat') X = data['train_features'] eigenvectors, eigenvalues = compute_pca.compute_pca(X) cs = np.cumsum(eigenvalues) x = range(1, 101) y = cs print cs fig = plt.figure() ax = fig.add_subplot(1, 1, 1) ax.set_title('Cumulative variance') ax.scatter(x, y) plt.show()