Exemple #1
0
def test_incremental_pca_sparse(matrix_class):
    # Incremental PCA on sparse arrays.
    X = iris.data
    pca = PCA(n_components=2)
    pca.fit_transform(X)
    X_sparse = matrix_class(X)
    batch_size = X_sparse.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)

    X_transformed = ipca.fit_transform(X_sparse)

    assert X_transformed.shape == (X_sparse.shape[0], 2)
    np.testing.assert_allclose(ipca.explained_variance_ratio_.sum(),
                               pca.explained_variance_ratio_.sum(),
                               rtol=1e-3)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X_sparse)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        np.testing.assert_allclose(np.dot(cov, precision),
                                   np.eye(X_sparse.shape[1]),
                                   atol=1e-13)

    with pytest.raises(TypeError,
                       match="IncrementalPCA.partial_fit does not support "
                       "sparse input. Either convert data to dense "
                       "or use IncrementalPCA.fit to do so in batches."):
        ipca.partial_fit(X_sparse)
def test_incremental_pca():
    # Incremental PCA on dense arrays.
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    assert X_transformed.shape == (X.shape[0], 2)
    np.testing.assert_allclose(
        ipca.explained_variance_ratio_.sum(),
        pca.explained_variance_ratio_.sum(),
        rtol=1e-3,
    )

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        np.testing.assert_allclose(
            np.dot(cov, precision), np.eye(X.shape[1]), atol=1e-13
        )
Exemple #3
0
class IPCA(object):
    def __init__(self,
                 n_components=None,
                 whiten=False,
                 copy=True,
                 batch_size=None):
        """
        :param n_components:   default为None ,int 或None, 想要保留的分量数,None 时,
        min(n_samples, n_features)
        :param whiten:   bool型,可选项, 默认为False, 当true(默认情况下为false)时,components_ 向量除以
        n_samples*components_以确保具有单位组件级方差的不相关输出。
        :param copy: 默认为True,  False时,x 将被覆盖,将节约能存,但存在不安全
        :param batch_size: default None, 批量样本数,   只在fit 中使用,设为None,系统自动设成5*n_features,
        以保持经度与内存开销的平衡
        """
        self.model = IncrementalPCA(n_components=n_components,
                                    whiten=whiten,
                                    copy=copy,
                                    batch_size=batch_size)

    def fit(self, x, y=None):
        self.model.fit(X=x, y=y)

    def transform(self, x):
        return self.model.transform(X=x)

    def fit_transform(self, x, y=None):
        return self.model.fit_transform(X=x, y=y)

    def get_params(self, deep=True):  # 获取评估器的参数
        return self.model.get_params(deep=deep)

    def set_params(self, **params):  # 设置评估器的参数
        self.model.set_params(**params)

    def inverse_transform(self, x):  # 与 fit_tansform 刚好相反的两个操作
        return self.model.inverse_transform(X=x)

    def get_precision(self):  # 根据生成模型计算精度矩阵
        return self.model.get_precision()

    def get_covariance(self):  # 根据生成模型获取协方差
        return self.model.get_covariance()

    def partial_fit(self, x, y=None, check_input=True):  # 增量训练
        self.model.partial_fit(X=x, y=y, check_input=check_input)

    def get_attributes(self):
        component = self.model.components_
        explained_variance = self.model.explained_variance_
        explained_variance_ratio = self.model.explained_variance_ratio_
        singular_values = self.model.singular_values_
        means = self.model.mean_  # 每个特征的均值
        var = self.model.var_  # 每个特征的方差
        noise_variance = self.model.noise_variance_  # 评估的噪声协方差
        n_component = self.model.n_components_
        n_samples_seen = self.model.n_samples_seen_
        return component, explained_variance, explained_variance_ratio, singular_values, means, var, noise_variance, \
               n_component, n_samples_seen
Exemple #4
0
def test_incremental_pca():
    """Incremental PCA on dense arrays."""
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
    assert_almost_equal(ipca.explained_variance_ratio_.sum(),
                        pca.explained_variance_ratio_.sum(), 1)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision), np.eye(X.shape[1]))
def test_incremental_pca():
    """Incremental PCA on dense arrays."""
    X = iris.data
    batch_size = X.shape[0] // 3
    ipca = IncrementalPCA(n_components=2, batch_size=batch_size)
    pca = PCA(n_components=2)
    pca.fit_transform(X)

    X_transformed = ipca.fit_transform(X)

    np.testing.assert_equal(X_transformed.shape, (X.shape[0], 2))
    assert_almost_equal(ipca.explained_variance_ratio_.sum(),
                        pca.explained_variance_ratio_.sum(), 1)

    for n_components in [1, 2, X.shape[1]]:
        ipca = IncrementalPCA(n_components, batch_size=batch_size)
        ipca.fit(X)
        cov = ipca.get_covariance()
        precision = ipca.get_precision()
        assert_array_almost_equal(np.dot(cov, precision),
                                  np.eye(X.shape[1]))
Exemple #6
0
# In[ ]:

# In[16]:

# IncrementalPCA
ipca = IncrementalPCA(n_components=3)
print(ipca)
ipca.fit(feature)
x = ipca.transform(feature)
print(ipca.explained_variance_ratio_)

# In[17]:

# ipca.get_covariance()
ipca.get_precision()
ipca.get_params(deep=True)

# ## Gaussian random projection

# In[27]:

from sklearn import random_projection

transform = random_projection.GaussianRandomProjection(n_components=5)
feature_new = transform.fit_transform(feature)
classifier_f = open('n_GaussianRandomProjection.pickle', 'wb')
pickle.dump(feature_new, classifier_f)
classifier_f.close()
print(feature_new)