def pca_pipeline(matrix, num_of_pcs): # PCA: print("Performing PCA...") matrix, eigvalues, eigvectors = pca.compute_pcs(matrix, num_of_pcs) print("Projecting onto the PCs...") scores_set = pca.scores(matrix, eigvectors) print("Plotting by the PCs...") pca.plot(scores_set, num_of_pcs, PIPELINE_NAMES[-1], PIPELINE_IDS[-1]) print("Elbow Method for choosing K clusters...") X = elbow_method(scores_set, num_of_pcs, 1, 11) num_of_clusters = int(input("Number of clusters? ")) print("K-Means clustering...") labels = kmeans(X, num_of_clusters) print("Performing a qualitative analysis...") qualitative_analysis(labels, num_of_clusters) return X
def test_pca(): ######## TEST 1 ######### X = load_iris().data labels = load_iris().feature_names y = load_iris().target param_grid = { 'n_components': [None, 0.01, 1, 0.95, 2, 100000000000], 'row_labels': [None, [], y], 'col_labels': [None, [], labels], } allNames = param_grid.keys() combinations = it.product(*(param_grid[Name] for Name in allNames)) combinations = list(combinations) for combination in combinations: model = pca.fit(X, n_components=combination[0], row_labels=combination[1], col_labels=combination[2]) ax = pca.plot(model) # plt.close('all') ax = pca.biplot(model) # plt.close('all') ax = pca.biplot3d(model) # plt.close('all') ######## TEST 2 ######### X = sparse_random(100, 1000, density=0.01, format='csr', random_state=42) model = pca.fit(X) ax = pca.plot(model) # plt.close('all') ax = pca.biplot(model) # plt.close('all') ax = pca.biplot3d(model) # plt.close('all') ######## TEST 3 ######### X = load_iris().data labels = load_iris().feature_names y = load_iris().target model = pca.fit(X) ax = pca.plot(model) # plt.close('all') ax = pca.biplot(model) # plt.close('all') ax = pca.biplot3d(model) # plt.close('all') model = pca.fit(X, row_labels=y, col_labels=labels) fig = pca.biplot(model) # plt.close('all') fig = pca.biplot3d(model) # plt.close('all') model = pca.fit(X, n_components=0.95) ax = pca.plot(model) # plt.close('all') ax = pca.biplot(model) # plt.close('all') model = pca.fit(X, n_components=2) ax = pca.plot(model) # plt.close('all') ax = pca.biplot(model) # plt.close('all') Xnorm = pca.norm(X, pcexclude=[1, 2]) model = pca.fit(Xnorm, row_labels=y, col_labels=labels) ax = pca.biplot(model) # plt.close('all') ax = pca.plot(model)
import matplotlib.pyplot as plt import numpy as np from mlxtend.data import loadlocal_mnist from pca import PCA, plot, reconstruct k = 235 images, labels = loadlocal_mnist(images_path='./Data/train-images-idx3-ubyte', labels_path='./Data/train-labels-idx1-ubyte') mat = images[labels == 0].astype('float64') pca_mat, mean, k_evectors = PCA(k, mat, use_perc=False) recons_mat = reconstruct(pca_mat, mean, k_evectors) plot(mat[0], 'Original') plot(recons_mat[0], 'Reconstructed') plt.show()
import pca pca = pca.PCAPlot() pca.plot(["Mike", "Johanna"]) pca.plot()
'row_labels': [None, [], y], 'col_labels': [None, [], labels], } import itertools as it allNames = param_grid.keys() combinations = it.product(*(param_grid[Name] for Name in allNames)) combinations = list(combinations) # %% for combination in combinations: model = pca.fit(X, n_components=combination[0], row_labels=combination[1], col_labels=combination[2]) ax = pca.plot(model) ax = pca.biplot(model) ax = pca.biplot3d(model) # %% import pca from scipy.sparse import random as sparse_random X = sparse_random(100, 1000, density=0.01, format='csr', random_state=42) model = pca.fit(X) ax = pca.plot(model) ax = pca.biplot(model) ax = pca.biplot3d(model) # %% model = pca.fit(X)