def pca_eigenvalues(x_adult, x_wine):
    warnings.simplefilter(action='ignore', category=FutureWarning)
    pca = PCA(n_components=10)
    pca.fit(x_adult)
    y_adult = pca.explained_variance_

    pca = PCA(n_components=10)
    pca.fit(x_wine)
    y_wine = pca.explained_variance_

    mca = MCA(n_components=10)
    mca.fit(x_wine)
    y_wine2 = 100 * np.array(mca.eigenvalues_)

    x_axis = [k + 1 for k in range(10)]

    plot.style.use('seaborn-darkgrid')
    plot.title(f'Eigen values distributions')
    plot.xlabel('Eigen value index')
    plot.ylabel('Eigen value')
    plot.xticks(x_axis, x_axis)
    plot.plot(x_axis, np.transpose([y_adult, y_wine, y_wine2]), 'o-')
    plot.legend(['Adult', 'Wine reviews (PCA)', 'Wine reviews (MCA) x100'],
                loc='upper right')
    plot.show()
Beispiel #2
0
class DFMCA(BaseEstimator, TransformerMixin):
    # NOTE:
    # - DFMCA(n_components=df[columns].apply(lambda x: len(x.unique())).sum()) to remain every dimensions
    # - Ensure to convert binary encoded features as string, to ensure prince.MCA() will generate new one-hot encoded features by calling pd.get_dummies()
    def __init__(self, columns=None, prefix='mca_', **kwargs):
        self.columns        = columns
        self.prefix         = prefix
        self.model          = MCA(**kwargs)
        self.transform_cols = None
        self.stat_df        = None
        
    def fit(self, X, y=None):
        self.columns        = X.columns if self.columns is None else self.columns
        self.transform_cols = [x for x in X.columns if x in self.columns]
        self.model.fit(X[self.transform_cols])

        # Reference: Reference: https://www.appliedaicourse.com/lecture/11/applied-machine-learning-online-course/2896/pca-for-dimensionality-reduction-not-visualization/0/free-videos
        self.stat_df = pd.DataFrame({
            'dimension': [x+1 for x in range(len(self.model.eigenvalues_))],
            'eigenvalues': self.model.eigenvalues_,
            'explained_inertia': self.model.explained_inertia_,
            'cumsum_explained_inertia': np.cumsum(self.model.explained_inertia_)
        })

        return self
    
    def transform(self, X):
        if self.transform_cols is None:
            raise NotFittedError(f"This {self.__class__.__name__} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.")

        new_X = self.model.transform(X[self.transform_cols])
        new_X.rename(columns=dict(zip(new_X.columns, [f'{self.prefix}{x}' for x in new_X.columns])), inplace=True)
        new_X = pd.concat([X.drop(columns=self.transform_cols), new_X], axis=1)

        return new_X
    
    def fit_transform(self, X, y=None):
        return self.fit(X).transform(X)