Exemple #1
1
def compute_pca(data_path=os.path.join(BASE_DIR, 'data/memmap/'),
                  out_path=os.path.join(BASE_DIR, 'data/'),
                  batch_size=500, image_size=3*300*300):

    ipca = IncrementalPCA(n_components=3, batch_size=batch_size)

    path = os.path.join(data_path, 'tn_x.dat')
    train = np.memmap(path, dtype=theano.config.floatX, mode='r+', shape=(4044,image_size))
    n_samples, _ = train.shape

    for batch_num, batch in enumerate(gen_batches(n_samples, batch_size)):
        X = train[batch,:]
        X = np.reshape(X, (X.shape[0], 3, int(image_size/3)))
        X = X.transpose(0, 2, 1)
        X = np.reshape(X, (reduce(np.multiply, X.shape[:2]), 3))
        ipca.partial_fit(X)

    path = os.path.join(data_path, 'v_x.dat')
    valid = np.memmap(path, dtype=theano.config.floatX, mode='r+', shape=(500,image_size))
    n_samples, _ = valid.shape


    for batch_num, batch in enumerate(gen_batches(n_samples, batch_size)):
        X = valid[batch,:]
        X = np.reshape(X, (X.shape[0], 3, int(image_size/3)))
        X = X.transpose(0, 2, 1)
        X = np.reshape(X, (reduce(np.multiply, X.shape[:2]), 3))
        ipca.partial_fit(X)

    eigenvalues, eigenvectors = np.linalg.eig(ipca.get_covariance())
    eigenvalues.astype('float32').dump(os.path.join(out_path, 'eigenvalues.dat'))
    eigenvectors.astype('float32').dump(os.path.join(out_path, 'eigenvectors.dat'))
Exemple #2
0
def test_incremental_pca_sparse(matrix_class):
    # Incremental PCA on sparse arrays.
    X = iris.data
    pca = PCA(n_components=2)
    pca.fit_transform(X)
    X_sparse = matrix_class(X)
    batch_size = X_sparse.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)

    X_transformed = ipca.fit_transform(X_sparse)

    assert X_transformed.shape == (X_sparse.shape[0], 2)
    np.testing.assert_allclose(ipca.explained_variance_ratio_.sum(),
                               pca.explained_variance_ratio_.sum(),
                               rtol=1e-3)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X_sparse)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        np.testing.assert_allclose(np.dot(cov, precision),
                                   np.eye(X_sparse.shape[1]),
                                   atol=1e-13)

    with pytest.raises(TypeError,
                       match="IncrementalPCA.partial_fit does not support "
                       "sparse input. Either convert data to dense "
                       "or use IncrementalPCA.fit to do so in batches."):
        ipca.partial_fit(X_sparse)
Exemple #3
0
def get_ipca(paths, batch_size):
    """
    Computes IPCA for BGR values of given images
    :param paths:
    :param batch_size:
    :return: eigen_vectors, eigen_values, covariance matrix, order is BGR
    """
    ipca = IncrementalPCA(n_components=3)
    all_pixels = None
    count = len(paths)
    for i, path in enumerate(helper.show_progress(paths, 100)):

        # Load image and convert to a vector of RGB values
        pixels = load_image_pixels(path)

        # Accumulate pixels
        if all_pixels is None:
            all_pixels = pixels
        else:
            all_pixels = np.concatenate((all_pixels, pixels), axis=0)

        # IPCA
        if i % batch_size-1 == 0 or i == count - 1:
            ipca.partial_fit(all_pixels)
            all_pixels = None

    cov = ipca.get_covariance()
    eigen_vals, eigen_vecs = linalg.eig(cov)

    return eigen_vecs, eigen_vals, cov
def test_incremental_pca():
    # Incremental PCA on dense arrays.
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    assert X_transformed.shape == (X.shape[0], 2)
    np.testing.assert_allclose(
        ipca.explained_variance_ratio_.sum(),
        pca.explained_variance_ratio_.sum(),
        rtol=1e-3,
    )

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        np.testing.assert_allclose(
            np.dot(cov, precision), np.eye(X.shape[1]), atol=1e-13
        )
Exemple #5
0
class IPCA(object):
    def __init__(self,
                 n_components=None,
                 whiten=False,
                 copy=True,
                 batch_size=None):
        """
        :param n_components:   default为None ,int 或None, 想要保留的分量数,None 时,
        min(n_samples, n_features)
        :param whiten:   bool型,可选项, 默认为False, 当true(默认情况下为false)时,components_ 向量除以
        n_samples*components_以确保具有单位组件级方差的不相关输出。
        :param copy: 默认为True,  False时,x 将被覆盖,将节约能存,但存在不安全
        :param batch_size: default None, 批量样本数,   只在fit 中使用,设为None,系统自动设成5*n_features,
        以保持经度与内存开销的平衡
        """
        self.model = IncrementalPCA(n_components=n_components,
                                    whiten=whiten,
                                    copy=copy,
                                    batch_size=batch_size)

    def fit(self, x, y=None):
        self.model.fit(X=x, y=y)

    def transform(self, x):
        return self.model.transform(X=x)

    def fit_transform(self, x, y=None):
        return self.model.fit_transform(X=x, y=y)

    def get_params(self, deep=True):  # 获取评估器的参数
        return self.model.get_params(deep=deep)

    def set_params(self, **params):  # 设置评估器的参数
        self.model.set_params(**params)

    def inverse_transform(self, x):  # 与 fit_tansform 刚好相反的两个操作
        return self.model.inverse_transform(X=x)

    def get_precision(self):  # 根据生成模型计算精度矩阵
        return self.model.get_precision()

    def get_covariance(self):  # 根据生成模型获取协方差
        return self.model.get_covariance()

    def partial_fit(self, x, y=None, check_input=True):  # 增量训练
        self.model.partial_fit(X=x, y=y, check_input=check_input)

    def get_attributes(self):
        component = self.model.components_
        explained_variance = self.model.explained_variance_
        explained_variance_ratio = self.model.explained_variance_ratio_
        singular_values = self.model.singular_values_
        means = self.model.mean_  # 每个特征的均值
        var = self.model.var_  # 每个特征的方差
        noise_variance = self.model.noise_variance_  # 评估的噪声协方差
        n_component = self.model.n_components_
        n_samples_seen = self.model.n_samples_seen_
        return component, explained_variance, explained_variance_ratio, singular_values, means, var, noise_variance, \
               n_component, n_samples_seen
    def compute_pca_of_image_set(self):
        print("COMPUTING PCA OF IMAGE SET \n \n \n \n \n \n \n \n \n \n \n \n")
        recordings = os.listdir(self.data_path)

        c = 0
        # mean_r = 0
        # mean_g = 0
        # mean_b = 0
        # num_i = 0
        # for r in recordings:
        #     c += 1
        #     images = os.listdir(self.data_path + "/" + r + "/")
        #     for i in images:
        #         if i.endswith(".png"):
        #             data = self.read_image(self.data_path + "/" + r + "/" + i)
        #             mean_r += np.mean(data[:, :, 0])
        #             mean_g += np.mean(data[:, :, 1])
        #             mean_b += np.mean(data[:, :, 2])
        #             num_i += 1
        # mean_r = mean_r/num_i
        # mean_g = mean_g / num_i
        # mean_b = mean_b / num_i
        # print(mean_r)
        # print(mean_g)
        # print(mean_b)
        mean_r = 134.09352525641472
        mean_g = 131.9404211385675
        mean_b = 129.67342747136797
        m = [mean_r, mean_g, mean_b]

        transformer = IncrementalPCA(n_components=3)
        for r in recordings:
            c += 1
            images = os.listdir(self.data_path + "/" + r + "/")
            res = np.zeros(shape=(1, 3))
            for i in images:
                if i.endswith(".png"):
                    data = self.read_image(self.data_path + "/" + r + "/" + i)
                    arr = data.reshape((260 * 210), 3)
                    res = np.concatenate((res, arr), axis=0)
            res = np.delete(res, (0), axis=0)
            res = res - m
            res = res / 255
            transformer.partial_fit(res)

            if not c % 100:
                print(str(c) + '/' + str(len(recordings)))

        pickle.dump(transformer, open("pca.p", "wb"))
        print(transformer.get_covariance())
Exemple #7
0
def test_incremental_pca():
    """Incremental PCA on dense arrays."""
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
    assert_almost_equal(ipca.explained_variance_ratio_.sum(),
                        pca.explained_variance_ratio_.sum(), 1)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]))
def test_incremental_pca():
    """Incremental PCA on dense arrays."""
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
    assert_almost_equal(ipca.explained_variance_ratio_.sum(),
                        pca.explained_variance_ratio_.sum(), 1)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]))