Exemple #1
0
    def transform(self, X):
        """
        Apply dimensionality reduction to X.

        X is projected on the first principal components previously extracted
        from a training set.

        Parameters
        ----------
        X : array-like of shape (n_samples, n_features)
            New data, where `n_samples` is the number of samples
            and `n_features` is the number of features.

        Returns
        -------
        X_new : array-like of shape (n_samples, n_components)
            Projection of X in the first principal components, where `n_samples`
            is the number of samples and `n_components` is the number of the components.
        """
        _patching_status = PatchingConditionsChain(
            "sklearn.decomposition.PCA.transform")
        _dal_ready = _patching_status.and_conditions([
            (self.n_components_ > 0, "Number of components <= 0.")
        ])

        _patching_status.write_log()
        if _dal_ready:
            return self._transform_daal4py(X,
                                           whiten=self.whiten,
                                           check_X=True,
                                           scale_eigenvalues=False)
        return PCA_original.transform(self, X)
Exemple #2
0
 def transform(self, X):
     if self.n_components_ > 0:
         logging.info("sklearn.decomposition.PCA.transform: " +
                      get_patch_message("daal"))
         return self._transform_daal4py(X,
                                        whiten=self.whiten,
                                        check_X=True,
                                        scale_eigenvalues=False)
     else:
         logging.info("sklearn.decomposition.PCA.transform: " +
                      get_patch_message("sklearn"))
         return PCA_original.transform(self, X)
Exemple #3
0
    def _fit(self, X):
        if issparse(X):
            raise TypeError('PCA does not support sparse input. See '
                            'TruncatedSVD for a possible alternative.')

        if sklearn_check_version('0.23'):
            X = self._validate_data(X,
                                    dtype=[np.float64, np.float32],
                                    ensure_2d=True,
                                    copy=self.copy)
        else:
            X = check_array(X,
                            dtype=[np.float64, np.float32],
                            ensure_2d=True,
                            copy=self.copy)

        if self.n_components is None:
            if self.svd_solver != 'arpack':
                n_components = min(X.shape)
            else:
                n_components = min(X.shape) - 1
        else:
            n_components = self.n_components

        self._fit_svd_solver = self.svd_solver
        shape_good_for_daal = X.shape[1] / X.shape[0] < 2
        if self._fit_svd_solver == 'auto':
            if max(X.shape) <= 500 or n_components == 'mle':
                self._fit_svd_solver = 'full'
            elif n_components >= 1 and n_components < (
                    .1 if shape_good_for_daal else .8) * min(X.shape):
                self._fit_svd_solver = 'randomized'
            else:
                self._fit_svd_solver = 'full'

        if self._fit_svd_solver == 'full':
            if shape_good_for_daal:
                logging.info("sklearn.decomposition.PCA.fit: " +
                             get_patch_message("daal"))
                result = self._fit_full(X, n_components)
            else:
                logging.info("sklearn.decomposition.PCA.fit: " +
                             get_patch_message("sklearn"))
                result = PCA_original._fit_full(self, X, n_components)
        elif self._fit_svd_solver in ['arpack', 'randomized']:
            logging.info("sklearn.decomposition.PCA.fit: " +
                         get_patch_message("sklearn"))
            result = self._fit_truncated(X, n_components, self._fit_svd_solver)
        else:
            raise ValueError("Unrecognized svd_solver='{0}'"
                             "".format(self._fit_svd_solver))

        return result
Exemple #4
0
def get_train_datas(pos_im_paths, neg_im_paths):
    train_datas = []  # 存放训练数据
    train_lables = []  # 存放训练数据标签
    # 提取HoG特征存入训练数据数组
    for pos_im_path in pos_im_paths:
        train_datas.append(get_HoG_ft(get_images(pos_im_path)))
        train_lables.append(1)
    for neg_im_path in neg_im_paths:
        train_datas.append(get_HoG_ft(get_images(neg_im_path)))
        train_lables.append(0)
    train_datas = np.array(train_datas)  # 将图片数组转化为n维矩阵
    train_lables = np.array(train_lables)
    print("train_datas:%d" % train_datas.shape[0])  # 输出训练图片数量
    # print(train_datas.shape)
    # np.savetxt('out.txt', train_datas)
    train_datas = PCA(n_components=2).fit_transform(train_datas)
    return train_datas, train_lables
    def transform_X(self, X, W):
        if X.shape[1] > 3 or X.shape[1] < 2:
            from sklearn.manifold import LocallyLinearEmbedding, TSNE
            from sklearn.preprocessing import StandardScaler
            from tf_labelprop.gssl.graph.gssl_utils import extract_lap_eigvec, lap_matrix
            """
                if not W is None:
                    X_transf = extract_lap_eigvec(lap_matrix(W,which_lap='sym'), 2)[0]
                else:
                    X_transf = LocallyLinearEmbedding(n_neighbors=20,n_components=2,random_state=1018,method="modified").fit_transform(X)
                
                scaler = StandardScaler()
                scaler.fit(X_transf)
                X_transf = scaler.transform(X_transf)
                """
            X_transf = PCA(n_components=2).fit_transform(X)

        else:
            X_transf = np.array(X)
        return X_transf
Exemple #6
0
    def _fit(self, X):
        if issparse(X):
            raise TypeError('PCA does not support sparse input. See '
                            'TruncatedSVD for a possible alternative.')

        if sklearn_check_version('0.23'):
            X = self._validate_data(X,
                                    dtype=[np.float64, np.float32],
                                    ensure_2d=True,
                                    copy=False)
        else:
            X = check_array(X,
                            dtype=[np.float64, np.float32],
                            ensure_2d=True,
                            copy=False)

        if self.n_components is None:
            if self.svd_solver != 'arpack':
                n_components = min(X.shape)
            else:
                n_components = min(X.shape) - 1
        else:
            n_components = self.n_components

        self._fit_svd_solver = self.svd_solver
        shape_good_for_daal = X.shape[1] / X.shape[0] < 2

        if self._fit_svd_solver == 'auto':
            if n_components == 'mle':
                self._fit_svd_solver = 'full'
            else:
                n, p, k = X.shape[0], X.shape[1], n_components
                # These coefficients are result of training of Logistic Regression
                # (max_iter=10000, solver="liblinear", fit_intercept=False)
                # on different datasets and number of components. X is a dataset with
                # npk, np^2, and n^2 columns. And y is speedup of patched scikit-learn's
                # full PCA against stock scikit-learn's randomized PCA.
                regression_coefs = np.array([
                    [9.779873e-11, n * p * k],
                    [-1.122062e-11, n * p * p],
                    [1.127905e-09, n**2],
                ])

                if n_components >= 1 \
                        and np.dot(regression_coefs[:, 0], regression_coefs[:, 1]) <= 0:
                    self._fit_svd_solver = 'randomized'
                else:
                    self._fit_svd_solver = 'full'

        if not shape_good_for_daal or self._fit_svd_solver != 'full':
            if sklearn_check_version('0.23'):
                X = self._validate_data(X, copy=self.copy)
            else:
                X = check_array(X, copy=self.copy)

        _patching_status = PatchingConditionsChain(
            "sklearn.decomposition.PCA.fit")
        _dal_ready = _patching_status.and_conditions([
            (self._fit_svd_solver == 'full',
             f"'{self._fit_svd_solver}' SVD solver is not supported. "
             "Only 'full' solver is supported.")
        ])

        if _dal_ready:
            _dal_ready = _patching_status.and_conditions([
                (shape_good_for_daal,
                 "The shape of X does not satisfy oneDAL requirements: "
                 "number of features / number of samples >= 2")
            ])
            if _dal_ready:
                result = self._fit_full(X, n_components)
            else:
                result = PCA_original._fit_full(self, X, n_components)
        elif self._fit_svd_solver in ['arpack', 'randomized']:
            result = self._fit_truncated(X, n_components, self._fit_svd_solver)
        else:
            raise ValueError("Unrecognized svd_solver='{0}'"
                             "".format(self._fit_svd_solver))

        _patching_status.write_log()
        return result
Exemple #7
0
    data.append(fd)
    labels.append(1)

for file in neg_im_listing:
    img = cv2.imread(neg_im_path + '/' + file, 0)
    img = cv2.resize(img, (64, 128))
    # Now we calculate the HOG for negative features
    fd = ft.hog(img, 9, (8, 8), (2, 2), block_norm='L2', feature_vector=True)
    print(fd)
    data.append(fd)
    labels.append(0)
# encode the labels, converting them from strings to integers

data = np.array(data)
labels = np.array(labels)
data = PCA(n_components=2).fit_transform(data)

# 拆分训练数据和测试数据
x_train, x_test, y_train, y_test = train_test_split(data,
                                                    labels,
                                                    test_size=0.5)
# 构建线性SVM对象并训练
clf = LinearSVC(C=1, loss="hinge").fit(x_train, y_train)
# 训练数据预测正确率
print(clf.score(x_train, y_train))
# 测试数据预测正确率
print('Accuracy Rate:', clf.score(x_test, y_test))
# 画训练数据散点图
plt.scatter(x_train[:, 0], x_train[:, 1], c=y_train)
# 画测试数据散点图
plt.scatter(x_test[:, 0], x_test[:, 1], c=y_test, edgecolors='b')
Exemple #8
0
from imutils import paths
from PIL import Image
import numpy as np
import CarDetect as cd
from sklearn.decomposition._pca import PCA

test_im_path = 'images/CarData/testImages'
test_paths = list(paths.list_files(test_im_path, validExts='.pgm'))
test_image = np.array(Image.open(test_paths[3]))
test_datas = []
test_labels = []
imgs = []
for h in range(20, 50, 15):  # 遍历矩形框大小初始值为40*20,增大步长为15,最大值为100*50
    for i in range(0, test_image.shape[0] - h, h):  # 实现遍历固定框中的图像
        for j in range(0, test_image.shape[1] - h * 2, h * 2):
            img = test_image[i:i + h, j:j + h * 2]
            imgs.append(img)
    for img in imgs:
        test_datas.append(cd.get_HoG_ft(img))  # 将分割开的图像分别求HoG特征
    test_datas = np.array(test_datas)
    test_datas = PCA(n_components=2).fit_transform(test_datas)
Exemple #9
0
print("Eigen Vectors = {} \n".format(vectors))
print("Eigen values = {} \n".format(values))

P = vectors.T.dot(C.T)
print("Matrix after applying PCA = {} \n".format(P.T))

#Scikit learn Verifcation of the above result
print('*' * 15 + 'Verification of the above result using sklearn' + '*' * 15)

from numpy import array
from sklearn.decomposition import PCA
# define a matrix
A = array([[1, 2], [3, 4], [5, 6]])
print(A)
# create the PCA instance
pca = PCA(2)
# fit on data
pca.fit(A)
# access values and vectors
print(pca.components_)
print(pca.explained_variance_)
# transform data
B = pca.transform(A)
print(B)
"""
***************Plain numpy implementation of PCA***************
Initial Martix = [[1 2]
 [3 4]
 [5 6]]
Mean of the matrix = [3. 4.]
Column scaling applied over the matrix = [[-2. -2.]