Ejemplo n.º 1
0
    def _train(self, fea_dir: str) -> None:
        """
        Train the part PCA.

        Args:
            fea_dir (str): the path of features for training part PCA.
        """
        fea, _, pos_info = feature_loader.load(fea_dir, self.feature_names)
        fea_names = list(pos_info.keys())
        ori_dim = fea.shape[1]

        already_proj_dim = 0
        for fea_name in fea_names:
            st_idx, ed_idx = pos_info[fea_name][0], pos_info[fea_name][1]
            ori_part_dim = ed_idx - st_idx
            if self._hyper_params["proj_dim"] == 0:
                proj_part_dim = ori_part_dim
            else:
                ratio = self._hyper_params["proj_dim"] * 1.0 / ori_dim
                if fea_name != fea_names[-1]:
                    proj_part_dim = int(ori_part_dim * ratio)
                else:
                    proj_part_dim = self._hyper_params[
                        "proj_dim"] - already_proj_dim
                    assert proj_part_dim <= ori_part_dim, "reduction dimension can not be distributed to each part!"
                already_proj_dim += proj_part_dim

            pca = SKPCA(n_components=proj_part_dim,
                        whiten=self._hyper_params["whiten"])
            train_fea = fea[:, st_idx:ed_idx]
            if self._hyper_params["l2"]:
                train_fea = normalize(train_fea, norm="l2")
            pca.fit(train_fea)
            self.pcas[fea_name] = {"pos": (st_idx, ed_idx), "pca": pca}
Ejemplo n.º 2
0
    def check_components(Estimator, n_components, shape):
        X = DATA[shape]

        pca = Estimator(n_components, **KWDS[Estimator]).fit(X)
        skpca = SKPCA(n_components).fit(X)

        assert_columns_allclose_upto_sign(pca.components_.T,
                                          skpca.components_.T)
Ejemplo n.º 3
0
    def __init__(self, n_components=2, **kwargs):
        """Init.

        Args:
            n_components(int): Number of principal components to keep.
            **kwargs: Any remaining keyword arguments are passed to the sklearn PCA class.
        """
        self.comet_exp = None
        self.estimator = SKPCA(n_components=n_components, **kwargs)
Ejemplo n.º 4
0
    def check_transform(Estimator, n_components, shape):
        X = DATA[shape]

        pca = Estimator(n_components, **KWDS[Estimator])
        skpca = SKPCA(n_components)

        Y = pca.fit_transform(X)
        Ysk = skpca.fit_transform(X)
        assert_columns_allclose_upto_sign(Y, Ysk)
Ejemplo n.º 5
0
    def check_explained_variance(Estimator, n_components, shape):
        X = DATA[shape]

        pca = Estimator(n_components, **KWDS[Estimator]).fit(X)
        skpca = SKPCA(n_components).fit(X)

        assert_allclose(pca.explained_variance_, skpca.explained_variance_)
        assert_allclose(pca.explained_variance_ratio_,
                        skpca.explained_variance_ratio_)
    def __init__(self, feature_names: List[str], hps: Dict or None = None):
        """
        Args:
            feature_names (list): a list of features names to be loaded.
            hps (dict): default hyper parameters in a dict (keys, values).
        """
        super(PCA, self).__init__(feature_names, hps)

        self.pca = SKPCA(n_components=self._hyper_params["proj_dim"],
                         whiten=self._hyper_params["whiten"])
        self._train(self._hyper_params["train_fea_dir"])
Ejemplo n.º 7
0
 def __init__(self, feature_names, hps):
     """
     Args:
         feature_names (list): a list of features names to be loaded.
         hps (dict): default hyper parameters in a dict (keys, values).
     """
     super(PCA, self).__init__()
     self.feaure_names = feature_names
     self.hps = hps
     self.pca = SKPCA(n_components=self.hps["proj_dim"],
                      whiten=self.hps["whiten"])
     self._train(self.hps["train_fea_dir"])
Ejemplo n.º 8
0
    def init_estimator(self, x):
        """Init estimator by adding a PCA step if need be.

        Args:
            x(BaseDataset): Dataset to fit.

        """
        steps = [self.umap_estimator]
        if x.shape[1] > 100 and x.shape[0] > 1000:
            # Note : PHATE does a PCA step by default. See their doc.
            print(
                'More than 100 dimensions and 1000 samples. Adding PCA step to UMAP pipeline.'
            )
            steps = [SKPCA(n_components=100)] + steps

        self.estimator = make_pipeline(*steps)
Ejemplo n.º 9
0
    '''
    依据保留信息百分比参数precent计算PCA应该保留的维度n
    params:
        val 特征值
        precent 保留百分比
    return:
        n 应该保留的维度
    '''
    sortArr = np.sort(val)
    sortArr = sortArr[-1::-1]
    ArrSum = np.sum(sortArr)
    tmp = 0
    n = 0
    for x in sortArr:
        tmp += x
        n += 1
        if tmp >= ArrSum * precent:
            return n


if __name__ == "__main__":
    X = np.array([[-1, -1, 0, 2, 1], [2, 0, 0, -1, -1], [2, 0, 1, 1, 0]])
    X_ = PCA(X, 2)
    print("自定义PCA\n", X_)

    # PCA by Scikit-learn
    pca = SKPCA(n_components=2)
    pca.fit(X)  #输入数据
    print('PCA Scikit-learn')
    print(pca.fit_transform(X))
# In[7]:


## Some preprocessing of the data
NUM_DATAPOINTS = 1000
X = (images.reshape(-1, 28 * 28)[:NUM_DATAPOINTS]) / 255.
Xbar, mu, std = normalize(X)


# In[8]:


for num_component in range(1, 20):
    from sklearn.decomposition import PCA as SKPCA
    # We can compute a standard solution given by scikit-learn's implementation of PCA
    pca = SKPCA(n_components=num_component, svd_solver='full')
    sklearn_reconst = pca.inverse_transform(pca.fit_transform(Xbar))
    reconst = PCA(Xbar, num_component)
    np.testing.assert_almost_equal(reconst, sklearn_reconst)
    print(np.square(reconst - sklearn_reconst).sum())


# The greater number of of principal components we use, the smaller will our reconstruction
# error be. Now, let's answer the following question: 
# 
# 
# > How many principal components do we need
# > in order to reach a Mean Squared Error (MSE) of less than $100$ for our dataset?
# 
#