def getCentVec(self, contextVecs): sample, rank, dim = contextVecs.shape contexts = np.reshape(contextVecs, (sample * rank, dim)) pca = PCA(n_components=1) pca.fit(contexts) return pca.components_[0]
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X,X) D = np.sqrt(D) #TODO: D = self.construct_dist_graph(X , D) # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z,f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) sorted_indices = np.argsort(D) G = np.zeros((n, n)) for i in range(D.shape[0]): for j in range(self.nn + 1): G[i, sorted_indices[i, j]] = D[i, sorted_indices[i, j]] G[sorted_indices[i, j], i] = D[sorted_indices[i, j], i] dist = utils.dijkstra(G) dist[np.isinf(dist)] = dist[~np.isinf(dist)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, dist) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) # D is symmetric matrix geoD = np.zeros((n, n)) # find nn-neighbours for i in range(n): sort = np.argsort(D[:, i]) neigh = np.setdiff1d(sort[0:self.nn + 1], i) # find the nn+1 smallest indexes that are not i for j in range(len(neigh)): t = neigh[j] geoD[i, t] = D[i, t] geoD[t, i] = D[t, i] D = utils.dijkstra(geoD) # for disconnected vertices (distance is Inf) # set their dist = max_dist(graph) # to encourage they are far away from each other D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def getCxtSubspace(wl, dim, var_threshold=0.45): emb = [] for word in wl: if (word not in vecDict): print "non-exist:", word continue wordEmbed = dict[word] emb.append(wordEmbed) emb = np.array(emb) pca = PCA() pca.fit(emb) varList = pca.explained_variance_ratio_ cand = 0 varSum = 0 for var in varList: varSum += var cand += 1 if (varSum >= var_threshold): break pca = PCA(n_components=cand) pca.fit(emb) top_embed = pca.components_ print "dim:", len(top_embed.tolist()), cand return top_embed.tolist()
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) # Construct nearest neighbour graph G = np.zeros([n, n]) for i in range(n): neighbours = np.argsort(D[i])[:self.nn + 1] for j in neighbours: G[i, j] = D[i, j] G[j, i] = D[j, i] # Compute ISOMAP distances D = utils.dijkstra(G) # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def test_pca(self): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) pca = PCA(n_comp=2) pca.fit(X) self.assertEqual( np.allclose(pca.explained_variance, np.array([0.9924, 0.0075]), atol=1e-3), True)
def cross_validation(X, Y, folds=5, split_value=0.3, name="lda"): # Y = Y.reshape((len(Y), 1)) # X = np.hstack((X, Y)) # part = -1 # # if split: # part = split_value # else: # part = int(np.math.ceil(len(X) / folds)) # scores = [] # # for i in range(folds): # test = np.array(X[i * part: (i + 1) * part]) # test = [list(d) for d in test] # train = [np.array(j) for j in X if list(j) not in test] # test = np.array(test) # train = np.array(train) # # train_x, train_y = train[:, :-1], train[:, -1] # test_x, test_y = test[:, :-1], test[:, -1] # # print(train_x.shape) # print(test_x.shape) scores = [] for fold in range(folds): train_x, test_x, train_y, test_y = train_test_split( X, Y, shuffle=True, test_size=split_value) if name == "lda": lda = LDA() lda.fit(train_x, train_y) lda_train_x = lda.transform(train_x) lda_test_x = lda.transform(test_x) else: pca = PCA() pca.fit(train_x) pca_train_x = pca.transform(train_x) pca_test_x = pca.transform(test_x) '''classifier''' lr = LogisticRegression(solver='saga', n_jobs=4) lr.fit(train_x, train_y) score = lr.score(test_x, test_y) scores.append(score) print("accuracy on fold ", fold, " : ", score) mean = np.mean(scores) std = np.std(scores) print("mean accuracy : ", mean) print("standard deviation : ", std) return mean, std, scores
def pcaSenEmb(sent_vecs, var_threshold=0.6): """ output: basis of context space """ pca = PCA() pca.fit(sent_vecs) var_list = pca.explained_variance_ratio_ cand = 0 var_sum = 0 for var in var_list: var_sum += var cand += 1 if (var_sum >= var_threshold): break basis = pca.components_ return basis
def compress(self, X): n = X.shape[0] k = self.k # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) # Initialize low-dimensional representation with PCA pca = PCA(k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, k) return Z
def pcaContexts(self, idxList, idx=-1, contextMatrix=None): ''' input: context indices output: pca vectors ''' vecs = self.vecMatrix[np.array(idxList)] # randIdx = np.random.randint(0, self.vocabSize, size=(1,), dtype='i') # vecs = self.vecMatrixNorm[randIdx] pca = PCA(n_components=self.pcaRank) pca.fit(vecs) contextVecs = pca.components_[0:self.pcaRank] if idx >= 0: contextMatrix[idx] = contextVecs del vecs return contextVecs, sum(pca.explained_variance_ratio_)
def pca_subspace(elements, embedding_matrix, vector_dim, mean_centering, numComponents, debugInfo): ferr = open("errors_pca_representation", "a+") flog = open("logs_pca_representation", "a+") if embedding_matrix.ndim == 1: # only one word in the sentence, do nothing (no PCA), the vector-space of the word itself is the subspace ferr.write("[No PCA]: Only a single element from " + " ".join(elements) + " found in supplied embeddings for the document" + "_".join(debugInfo) + "\n") subspace = embedding_matrix singularValues = np.array([1.0]) energyRetained = 1.0 else: flog.write("Original NumComponents: " + str(numComponents) + " NumElements: " + str(embedding_matrix.shape[0]) + "\t") numComponents = min(embedding_matrix.shape[0], embedding_matrix.shape[1], numComponents) flog.write("New NumComponents: " + str(numComponents) + "\n") pca = PCA(n_components=numComponents, mean_centering=mean_centering) try: pca.fit(embedding_matrix) subspace = pca.components_ if numComponents == 1: # convert matrix to vector when numComponents = 1 subspace = subspace.T.reshape(-1) energyRetained = np.sum(pca.explained_variance_ratio_) singularValues = pca.singular_values_ except ( np.linalg.LinAlgError, ZeroDivisionError ) as e: # Fails (svd doesn't converge) for some reason. Use the word-vector average in this case! ferr.write("[SVD Error]: No subspace constructed for " + " ".join(elements) + " in the document: " + "_".join(debugInfo) + "\n") subspace = np.mean(embedding_matrix, axis=0) singularValues = np.array([1.0]) energyRetained = 1.0 ferr.close() flog.close() return subspace, singularValues, energyRetained
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) np.fill_diagonal(D, np.inf) ######## #"finding the neighbor at each point" G = np.matrix(np.ones((n, n)) * 0) for i in range(n): neighbours = np.argsort(D[:, i]) #want only the k nearest for j in neighbours[1:self.nn + 1]: G[i, j] = D[i, j] G[j, i] = D[j, i] #weighted shortest path between points (dijksta's) D = np.zeros((n, n)) for i in range(n): for j in range(i + 1, n): D[i, j] = utils.dijkstra(G, i, j) ######## # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def compress(self, X): n = X.shape[0] # Compute Euclidean distances D = utils.euclidean_dist_squared(X, X) D = np.sqrt(D) ######## # TODO # G = np.full((n, n), np.inf) for i in range(n): for j in range(n): #temp = np.list(D[i]).sort temp = sorted(D[i]) #print(self.nn+1) if D[i][j] in temp[:(self.nn + 1)]: G[i][j] = D[i][j] for i in range(n): for j in range(n): D[i][j] = utils.dijkstra(G, i, j) ######## # If two points are disconnected (distance is Inf) # then set their distance to the maximum # distance in the graph, to encourage them to be far apart. D[np.isinf(D)] = D[~np.isinf(D)].max() #G[np.isinf(G)] = G[~np.isinf(G)].max() # Initialize low-dimensional representation with PCA pca = PCA(self.k) pca.fit(X) Z = pca.compress(X) # Solve for the minimizer z, f = findMin(self._fun_obj_z, Z.flatten(), 500, D) Z = z.reshape(n, self.k) return Z
def Bonus2(): ''' Visualization of the first 10 eigen vectors. ''' # raw = genfromtxt('digits-raw.csv', delimiter=',') raw = genfromtxt('digits-raw-small.csv', delimiter=',') X = raw[:, 2:] pca = PCA(10) eigvec = pca.fit(X) eigimg = eigvec.reshape(10, 28, 28) for r in range(2): for c in range(5): i = r * 5 + c subplot(2, 5, i + 1) imshow(eigimg[i], cmap='gray') title(str(i)) show()
from sklearn import datasets import matplotlib.pyplot as plt import numpy as np from pca import PCA #data = datasets.load_digits() data = datasets.load_iris() X = data.data y = data.target # Project the data onto the 2 primary principal components pca = PCA(2) pca.fit(X) X_projected = pca.transform(X) print('Shape of X:', X.shape) print('Shape of transformed X:', X_projected.shape) x1 = X_projected[:, 0] x2 = X_projected[:, 1] plt.scatter(x1, x2, c=y, edgecolor='none', alpha=0.8, cmap=plt.cm.get_cmap('viridis', 3)) plt.xlabel('Principal Component 1') plt.ylabel('Principal Component 2') plt.colorbar()
if __name__ == '__main__': struct_log = "./data/HDFS/HDFS_100k.log_structured.csv" ## 1. 加载日志文件 提取特征向量 x_train, _ = load_HDFS(struct_log) feature_extractor = FeatureExtractor() x_train = feature_extractor.fit_transform(x_train, term_weighting='tf-idf', normalization='zero-mean') ## 2. Train an unsupervised model print('Train phase:') # Initialize PCA, or other unsupervised models, LogClustering, InvariantsMiner model = PCA() # Model hyper-parameters may be sensitive to log data, here we use the default for demo model.fit(x_train) # Make predictions and manually check for correctness. Details may need to go into the raw logs y_train = model.predict(x_train) print(f"y_train: {y_train}") ## 3. Use the trained model for online anomaly detection print('Test phase:') # Load another new log file. Here we use struct_log for demo only x_test, _ = load_HDFS(struct_log) # Go through the same feature extraction process with training, using transform() instead x_test = feature_extractor.transform(x_test) # Finally make predictions and alter on anomaly cases y_test = model.predict(x_test) print(f"y_test: {y_test}")
coloredlogs.install(level='DEBUG', logger=logger) else: coloredlogs.install(level='WARNING', logger=logger) logger.info('Fetching data...') data = fetch_data(ratio=0.8) X_train, y_train = data['train'] D, N = X_train.shape pca = PCA(n_comps=M, standard=standard, logger=logger) logger.info('Applying PCA with M=%d' % M) # normalise data W_train = pca.fit(X_train) logger.debug('W_train.shape=%s' % (W_train.shape,)) X_test, y_test = data['test'] I, K = X_test.shape assert I == D, logger.error( 'Number of features of test and train data do not match, %d != %d' % (D, I)) W_test = pca.transform(X_test) logger.debug('W_test.shape=%s' % (W_test.shape,)) classes = set(y_train.ravel()) C = len(classes) combs = list(itertools.combinations(classes, 2))
# -*- coding: utf-8 -*- """ Created on Fri Feb 7 23:53:40 2020 @author: ABOLI """ from pca import PCA X = [] X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) pca_model = PCA(2) pca_model.fit(X) print(pca_model.variance_ratio) print(pca_model.transform(X))
data = raw_data[[ 'Gender', 'Married', 'Education', 'ApplicantIncome', 'LoanAmount', 'Credit_History' ]] x = data.to_numpy() # x = ss.fit_transform(x) print("[INFO] Standardizing input vectors ... ") for i in range(x.shape[0]): # standardize each vector x[i] = standardize(x[i]) print("[INFO] Implementing principal components analysis ... ") pca = PCA() x = pca.fit(x) y = raw_data['outcome'].to_numpy() x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.3) model = KernelSVM() model.fit(x_train, y_train, alpha=0.01, iterations=100) predictions = model.predict(x_test) accuracy = accuracy_score(predictions, y_test) print("[INFO] Home made recipe : " + str(accuracy)) model = SVC(kernel='rbf') model.fit(x_train, y_train)
y = data.target # Minimum - maximum normalizasyon işlemi X_min_max = (X - X.min(axis=0)) / (X.max(axis=0) - X.min(axis=0)) fig, axes = plt.subplots(1, 2) axes[0].scatter(X[:, 0], X[:, 1], c=y) axes[0].set_title("Gerçek Veri") axes[1].scatter(X_min_max[:, 0], X_min_max[:, 1], c=y) axes[1].set_title("Min-Max Norm. Veri") plt.show() # Verileri iki temel bileşen ile gösterme from pca import PCA pca = PCA(2) pca.fit(X_min_max) X_projected = pca.transform(X_min_max) print('Min-Max Normalizasyonlu X:', X_min_max.shape) # (150, 4) print('PCA Uygulanan X:', X_projected.shape) # (150, 2) x1 = X_projected[:, 0] x2 = X_projected[:, 1] plt.scatter(x1, x2, c=y, edgecolor='none', alpha=0.8, cmap=plt.cm.get_cmap('viridis', 3))
# # bar.finish() # end_time = time.time() # print("Accuracy for Simple Nearest Neighbour @rank 1 : ", "{:.4%}".format(rank_one_score / len(query_labels))) # print("Accuracy for Simple Nearest Neighbour @rank 5 : ", "{:.4%}".format(rank_five_score / len(query_labels))) # print("Accuracy for Simple Nearest Neighbour @rank 10 : ", "{:.4%}".format(rank_ten_score / len(query_labels))) # # print("Computation Time: %s seconds" % (end_time - start_time)) # PCA-MMC print("-----PCA_MMC-----") pca = PCA(original_train_features, original_train_labels, M=500, low_dimension=False) pca.fit() mmc = MMC_Supervised(max_iter=20, convergence_threshold=1e-5) mmc_metric = mmc.fit(pca.train_sample_projection, original_train_labels) transformed_features = mmc_metric.transform(features) transformed_query_features = transformed_features[query_idxs - 1] n = 10 start_time = time.time() rank_one_score = 0 rank_five_score = 0 rank_ten_score = 0 bar.start() for k in range(len(query_features)): bar.update(k + 1) feature_vector = transformed_query_features[k] gallery_vectors = transformed_features[gallery_data_idx[k] - 1]
import numpy as np from pca import PCA, PcaType data = np.array([[2.5, 0.5, 2.2, 1.9, 3.1, 2.3, 2.0, 1.0, 1.5, 1.1], [2.4, 0.7, 2.9, 2.2, 3.0, 2.7, 1.6, 1.1, 1.6, 0.9]]) pca = PCA() pca.fit(data=data, pcaType=PcaType.Two) pca.explained_variance_ratio() pca.get_covariance() pca.singular_values() pca.transform(data=data, n_components=2)
import numpy from pca import PCA # we'll create a random dataset of 5 variables and 100 samples random_dataset = numpy.random.rand(100, 5) # define a pca object and specify a number of components pca_ = PCA(n_components=2) # fit the model using dataset pca_.fit(dataset=random_dataset) # transform the dataset new_dataset = pca_.transform(dataset=random_dataset) # print the new and old data shapes print("Original shape:{}, new shape: {}".format(random_dataset.shape, new_dataset.shape))
train_y = np.asarray(train_y) test_y = np.asarray(test_y) print(test_y.shape) print(np.unique(Y)) type = int(input("pca or lda or both ?")) if type == 1: if d == 2: split = float(1 / 6) else: split = 0.3 s = "pca" pca = PCA() pca.fit(train_x) joblib.dump(pca.eigen_vectors, "pca_prjection_" + str(d) + ".pkl") pca_train_x = pca.transform(train_x) pca_test_x = pca.transform(test_x) # del pca print("pca done") lr = LogisticRegression(solver='saga', n_jobs=4) lr.fit(pca_train_x, train_y) print("accuracy on test data : ", lr.score(pca_test_x, test_y)) pr = lr.predict_proba(pca_test_x) pt = lr.predict(pca_test_x) tt = [np.argmax(i) for i in pr[:10]] print(tt)
def train(settings): Xtrain, ytrain, Xval, yval, Xtest, ytest = cross_val( path=settings[PATH], k=settings[FOLDS], emotions=settings[EMOTIONS]) # Each fold will have a new model that is used on the test data one time. The results are stored here test_loss, test_acc = [], [] # Save all the models, this way we can access their loss and accuracy stats models = [] # List of confusion matrix for later task cms = [] # For every fold, fit a new PCA, train new model, do validation, save the best model for k in range(settings[FOLDS]): Xtrain_k, ytrain_k, Xval_k, yval_k, Xtest_k, ytest_k = Xtrain[ k], ytrain[k], Xval[k], yval[k], Xtest[k], ytest[k] # Shuffle so there is no pattern Xtrain_k, ytrain_k = shuffle_data(Xtrain_k, ytrain_k) Xval_k, yval_k = shuffle_data(Xval_k, yval_k) Xtest_k, ytest_k = shuffle_data(Xtest_k, ytest_k) logging.info("Started fold number: {}".format(k)) # Convert to numpy arrays Xtrain_k, Xval_k, Xtest_k = np.array(Xtrain_k), np.array( Xval_k), np.array(Xtest_k) # Based on model there is different functions that needs to be set if settings[MODEL] == SoftmaxRegression: ytrain_k, yval_k, ytest_k = one_hot_encode( ytrain_k), one_hot_encode(yval_k), one_hot_encode(ytest_k) loss_function = softmax_loss_function accuracy = softmax_accuracy else: ytrain_k, yval_k, ytest_k = np.reshape( ytrain_k, (-1, 1)), np.reshape(yval_k, (-1, 1)), np.reshape(ytest_k, (-1, 1)) loss_function = logistic_loss_function accuracy = logistic_accuracy # Fit the pca only using training data pca = PCA(settings[NUM_COMPONENTS]) pca.fit(Xtrain_k) # Project Xtrain, Xval, Xtest onto the principal components Xtrain_k, Xval_k, Xtest_k = pca.transform(Xtrain_k), pca.transform( Xval_k), pca.transform(Xtest_k) # Make new model for this fold model = settings[MODEL](settings) best_weights, min_loss = model.weights, np.inf for epoch in range(1, settings[EPOCHS] + 1): # Select method for updating weights if settings[BATCH]: model.batch_gradient_descent(Xtrain_k, ytrain_k) else: model.stochastic_gradient_descent(Xtrain_k, ytrain_k) # Using an objective function to calculate the loss, and calculate the accuracy train_loss = loss_function(model, Xtrain_k, ytrain_k) val_loss = loss_function(model, Xval_k, yval_k) train_acc = accuracy(model, Xtrain_k, ytrain_k) val_acc = accuracy(model, Xval_k, yval_k) # Save the result for later graphs model.train_loss.append(train_loss) model.val_loss.append(val_loss) model.train_acc.append(train_acc) model.val_acc.append(val_acc) # Check if this is the lowest loss so far, if then save weights for best model if val_loss < min_loss: best_weights = np.copy(model.weights) min_loss = val_loss # Status update on how the training goes if epoch % 10 == 0: logging.info( "Epoch: {}, Train_loss: {} , Val_loss: {}, Train_acc: {}, Val_acc: {}" .format(epoch, train_loss, val_loss, train_acc, val_acc)) # Now update the weights in the model to the best weights model.weights = best_weights # Use this model on the test data, and save loss & accuracy test_loss.append(loss_function(model, Xtest_k, ytest_k)) test_acc.append(accuracy(model, Xtest_k, ytest_k)) if settings[MODEL] == SoftmaxRegression: cf_matrix = confusion_matrix(model, Xtest_k, ytest_k) cms.append(cf_matrix) # Model finished, add it to list of models models.append(model) # Calculate the average test_loss and test_acc avg_test_loss, avg_test_acc = np.mean(test_loss), np.mean(test_acc) std_test_acc = np.std(test_acc) logging.info("Average Test Loss Overall Folds: {}".format(avg_test_loss)) logging.info( "Average Test Accuracy Overall Folds: {}".format(avg_test_acc)) logging.info("Std Test Accuracy Overall Folds: {}".format(std_test_acc)) logging.info("Generating plots") train_losses = [model.train_loss for model in models] val_losses = [model.val_loss for model in models] train_acces = [model.train_acc for model in models] val_acces = [model.val_acc for model in models] graph_loss(train_losses, val_losses, settings) graph_acc(train_acces, val_acces, settings) pca.display_pc(settings) # Visualize the cf matrix and weights for each emotion if settings[MODEL] == SoftmaxRegression: avg_cf_matrix = np.mean(cms, axis=0) # Take the average of all matrixes graph_cm(avg_cf_matrix, settings) visualize_weights(models, pca, settings) return train_losses
plt.ylabel("$x_{%d}$" % f2) for i in range(n): plt.annotate(animals[i], (X[i, f1], X[i, f2])) utils.savefig('two_random_features.png') elif question == '2.2': dataset = load_dataset('animals.pkl') X = dataset['X'].astype(float) animals = dataset['animals'] n, d = X.shape # standardize columns X = utils.standardize_cols(X) model = PCA(k=2) model.fit(X) Z = model.compress(X) fig, ax = plt.subplots() plt.ylabel('z2') plt.xlabel('z1') ax.scatter(Z[:, 0], Z[:, 1]) for i in range(n): ax.annotate(animals[i], (Z[i, 0], Z[i, 1])) utils.savefig('q2_2_PCA_animals.png') elif question == '3.1': X = load_dataset('highway.pkl')['X'].astype(float) / 255 n, d = X.shape print(n, d) h, w = 64, 64 # height and width of each image
from sklearn.preprocessing import StandardScaler from sklearn.decomposition import PCA as sklearn_pca_model iris_dataset = datasets.load_iris() orginal_data = iris_dataset.data target = iris_dataset.target #print(orginal_data[:20]) #print(target[:20]) #print(orginal_data.shape) #print(target.shape) print("PCA FROM SCRATCH:") pca_from_scratch = PCA(n_components=2) pca_from_scratch.fit(orginal_data) transformed_data = pca_from_scratch.transform(orginal_data) #transformed_data = pca_from_scratch.fit_transform(orginal_data) pca_from_scratch.plot_cov_matrix() pca_from_scratch.plot_cumulative_explained_variance_ratio() print(pca_from_scratch.components) print(pca_from_scratch.explained_variance()) print(pca_from_scratch.explained_variance_ratio()) print() print("PCA SCIKIT-LEARN:") sklearn_pca = sklearn_pca_model(n_components=2) scaler = StandardScaler() scaler.fit(orginal_data)