def transform(self, X): """ Apply dimensionality reduction to X. X is projected on the first principal components previously extracted from a training set. Parameters ---------- X : array-like of shape (n_samples, n_features) New data, where `n_samples` is the number of samples and `n_features` is the number of features. Returns ------- X_new : array-like of shape (n_samples, n_components) Projection of X in the first principal components, where `n_samples` is the number of samples and `n_components` is the number of the components. """ _patching_status = PatchingConditionsChain( "sklearn.decomposition.PCA.transform") _dal_ready = _patching_status.and_conditions([ (self.n_components_ > 0, "Number of components <= 0.") ]) _patching_status.write_log() if _dal_ready: return self._transform_daal4py(X, whiten=self.whiten, check_X=True, scale_eigenvalues=False) return PCA_original.transform(self, X)
def transform(self, X): if self.n_components_ > 0: logging.info("sklearn.decomposition.PCA.transform: " + get_patch_message("daal")) return self._transform_daal4py(X, whiten=self.whiten, check_X=True, scale_eigenvalues=False) else: logging.info("sklearn.decomposition.PCA.transform: " + get_patch_message("sklearn")) return PCA_original.transform(self, X)
def _fit(self, X): if issparse(X): raise TypeError('PCA does not support sparse input. See ' 'TruncatedSVD for a possible alternative.') if sklearn_check_version('0.23'): X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy) else: X = check_array(X, dtype=[np.float64, np.float32], ensure_2d=True, copy=self.copy) if self.n_components is None: if self.svd_solver != 'arpack': n_components = min(X.shape) else: n_components = min(X.shape) - 1 else: n_components = self.n_components self._fit_svd_solver = self.svd_solver shape_good_for_daal = X.shape[1] / X.shape[0] < 2 if self._fit_svd_solver == 'auto': if max(X.shape) <= 500 or n_components == 'mle': self._fit_svd_solver = 'full' elif n_components >= 1 and n_components < ( .1 if shape_good_for_daal else .8) * min(X.shape): self._fit_svd_solver = 'randomized' else: self._fit_svd_solver = 'full' if self._fit_svd_solver == 'full': if shape_good_for_daal: logging.info("sklearn.decomposition.PCA.fit: " + get_patch_message("daal")) result = self._fit_full(X, n_components) else: logging.info("sklearn.decomposition.PCA.fit: " + get_patch_message("sklearn")) result = PCA_original._fit_full(self, X, n_components) elif self._fit_svd_solver in ['arpack', 'randomized']: logging.info("sklearn.decomposition.PCA.fit: " + get_patch_message("sklearn")) result = self._fit_truncated(X, n_components, self._fit_svd_solver) else: raise ValueError("Unrecognized svd_solver='{0}'" "".format(self._fit_svd_solver)) return result
def get_train_datas(pos_im_paths, neg_im_paths): train_datas = [] # 存放训练数据 train_lables = [] # 存放训练数据标签 # 提取HoG特征存入训练数据数组 for pos_im_path in pos_im_paths: train_datas.append(get_HoG_ft(get_images(pos_im_path))) train_lables.append(1) for neg_im_path in neg_im_paths: train_datas.append(get_HoG_ft(get_images(neg_im_path))) train_lables.append(0) train_datas = np.array(train_datas) # 将图片数组转化为n维矩阵 train_lables = np.array(train_lables) print("train_datas:%d" % train_datas.shape[0]) # 输出训练图片数量 # print(train_datas.shape) # np.savetxt('out.txt', train_datas) train_datas = PCA(n_components=2).fit_transform(train_datas) return train_datas, train_lables
def transform_X(self, X, W): if X.shape[1] > 3 or X.shape[1] < 2: from sklearn.manifold import LocallyLinearEmbedding, TSNE from sklearn.preprocessing import StandardScaler from tf_labelprop.gssl.graph.gssl_utils import extract_lap_eigvec, lap_matrix """ if not W is None: X_transf = extract_lap_eigvec(lap_matrix(W,which_lap='sym'), 2)[0] else: X_transf = LocallyLinearEmbedding(n_neighbors=20,n_components=2,random_state=1018,method="modified").fit_transform(X) scaler = StandardScaler() scaler.fit(X_transf) X_transf = scaler.transform(X_transf) """ X_transf = PCA(n_components=2).fit_transform(X) else: X_transf = np.array(X) return X_transf
def _fit(self, X): if issparse(X): raise TypeError('PCA does not support sparse input. See ' 'TruncatedSVD for a possible alternative.') if sklearn_check_version('0.23'): X = self._validate_data(X, dtype=[np.float64, np.float32], ensure_2d=True, copy=False) else: X = check_array(X, dtype=[np.float64, np.float32], ensure_2d=True, copy=False) if self.n_components is None: if self.svd_solver != 'arpack': n_components = min(X.shape) else: n_components = min(X.shape) - 1 else: n_components = self.n_components self._fit_svd_solver = self.svd_solver shape_good_for_daal = X.shape[1] / X.shape[0] < 2 if self._fit_svd_solver == 'auto': if n_components == 'mle': self._fit_svd_solver = 'full' else: n, p, k = X.shape[0], X.shape[1], n_components # These coefficients are result of training of Logistic Regression # (max_iter=10000, solver="liblinear", fit_intercept=False) # on different datasets and number of components. X is a dataset with # npk, np^2, and n^2 columns. And y is speedup of patched scikit-learn's # full PCA against stock scikit-learn's randomized PCA. regression_coefs = np.array([ [9.779873e-11, n * p * k], [-1.122062e-11, n * p * p], [1.127905e-09, n**2], ]) if n_components >= 1 \ and np.dot(regression_coefs[:, 0], regression_coefs[:, 1]) <= 0: self._fit_svd_solver = 'randomized' else: self._fit_svd_solver = 'full' if not shape_good_for_daal or self._fit_svd_solver != 'full': if sklearn_check_version('0.23'): X = self._validate_data(X, copy=self.copy) else: X = check_array(X, copy=self.copy) _patching_status = PatchingConditionsChain( "sklearn.decomposition.PCA.fit") _dal_ready = _patching_status.and_conditions([ (self._fit_svd_solver == 'full', f"'{self._fit_svd_solver}' SVD solver is not supported. " "Only 'full' solver is supported.") ]) if _dal_ready: _dal_ready = _patching_status.and_conditions([ (shape_good_for_daal, "The shape of X does not satisfy oneDAL requirements: " "number of features / number of samples >= 2") ]) if _dal_ready: result = self._fit_full(X, n_components) else: result = PCA_original._fit_full(self, X, n_components) elif self._fit_svd_solver in ['arpack', 'randomized']: result = self._fit_truncated(X, n_components, self._fit_svd_solver) else: raise ValueError("Unrecognized svd_solver='{0}'" "".format(self._fit_svd_solver)) _patching_status.write_log() return result
data.append(fd) labels.append(1) for file in neg_im_listing: img = cv2.imread(neg_im_path + '/' + file, 0) img = cv2.resize(img, (64, 128)) # Now we calculate the HOG for negative features fd = ft.hog(img, 9, (8, 8), (2, 2), block_norm='L2', feature_vector=True) print(fd) data.append(fd) labels.append(0) # encode the labels, converting them from strings to integers data = np.array(data) labels = np.array(labels) data = PCA(n_components=2).fit_transform(data) # 拆分训练数据和测试数据 x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.5) # 构建线性SVM对象并训练 clf = LinearSVC(C=1, loss="hinge").fit(x_train, y_train) # 训练数据预测正确率 print(clf.score(x_train, y_train)) # 测试数据预测正确率 print('Accuracy Rate:', clf.score(x_test, y_test)) # 画训练数据散点图 plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train) # 画测试数据散点图 plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, edgecolors='b')
from imutils import paths from PIL import Image import numpy as np import CarDetect as cd from sklearn.decomposition._pca import PCA test_im_path = 'images/CarData/testImages' test_paths = list(paths.list_files(test_im_path, validExts='.pgm')) test_image = np.array(Image.open(test_paths[3])) test_datas = [] test_labels = [] imgs = [] for h in range(20, 50, 15): # 遍历矩形框大小初始值为40*20,增大步长为15,最大值为100*50 for i in range(0, test_image.shape[0] - h, h): # 实现遍历固定框中的图像 for j in range(0, test_image.shape[1] - h * 2, h * 2): img = test_image[i:i + h, j:j + h * 2] imgs.append(img) for img in imgs: test_datas.append(cd.get_HoG_ft(img)) # 将分割开的图像分别求HoG特征 test_datas = np.array(test_datas) test_datas = PCA(n_components=2).fit_transform(test_datas)
print("Eigen Vectors = {} \n".format(vectors)) print("Eigen values = {} \n".format(values)) P = vectors.T.dot(C.T) print("Matrix after applying PCA = {} \n".format(P.T)) #Scikit learn Verifcation of the above result print('*' * 15 + 'Verification of the above result using sklearn' + '*' * 15) from numpy import array from sklearn.decomposition import PCA # define a matrix A = array([[1, 2], [3, 4], [5, 6]]) print(A) # create the PCA instance pca = PCA(2) # fit on data pca.fit(A) # access values and vectors print(pca.components_) print(pca.explained_variance_) # transform data B = pca.transform(A) print(B) """ ***************Plain numpy implementation of PCA*************** Initial Martix = [[1 2] [3 4] [5 6]] Mean of the matrix = [3. 4.] Column scaling applied over the matrix = [[-2. -2.]