def pca_eigenvalues(x_adult, x_wine): warnings.simplefilter(action='ignore', category=FutureWarning) pca = PCA(n_components=10) pca.fit(x_adult) y_adult = pca.explained_variance_ pca = PCA(n_components=10) pca.fit(x_wine) y_wine = pca.explained_variance_ mca = MCA(n_components=10) mca.fit(x_wine) y_wine2 = 100 * np.array(mca.eigenvalues_) x_axis = [k + 1 for k in range(10)] plot.style.use('seaborn-darkgrid') plot.title(f'Eigen values distributions') plot.xlabel('Eigen value index') plot.ylabel('Eigen value') plot.xticks(x_axis, x_axis) plot.plot(x_axis, np.transpose([y_adult, y_wine, y_wine2]), 'o-') plot.legend(['Adult', 'Wine reviews (PCA)', 'Wine reviews (MCA) x100'], loc='upper right') plot.show()
class DFMCA(BaseEstimator, TransformerMixin): # NOTE: # - DFMCA(n_components=df[columns].apply(lambda x: len(x.unique())).sum()) to remain every dimensions # - Ensure to convert binary encoded features as string, to ensure prince.MCA() will generate new one-hot encoded features by calling pd.get_dummies() def __init__(self, columns=None, prefix='mca_', **kwargs): self.columns = columns self.prefix = prefix self.model = MCA(**kwargs) self.transform_cols = None self.stat_df = None def fit(self, X, y=None): self.columns = X.columns if self.columns is None else self.columns self.transform_cols = [x for x in X.columns if x in self.columns] self.model.fit(X[self.transform_cols]) # Reference: Reference: https://www.appliedaicourse.com/lecture/11/applied-machine-learning-online-course/2896/pca-for-dimensionality-reduction-not-visualization/0/free-videos self.stat_df = pd.DataFrame({ 'dimension': [x+1 for x in range(len(self.model.eigenvalues_))], 'eigenvalues': self.model.eigenvalues_, 'explained_inertia': self.model.explained_inertia_, 'cumsum_explained_inertia': np.cumsum(self.model.explained_inertia_) }) return self def transform(self, X): if self.transform_cols is None: raise NotFittedError(f"This {self.__class__.__name__} instance is not fitted yet. Call 'fit' with appropriate arguments before using this estimator.") new_X = self.model.transform(X[self.transform_cols]) new_X.rename(columns=dict(zip(new_X.columns, [f'{self.prefix}{x}' for x in new_X.columns])), inplace=True) new_X = pd.concat([X.drop(columns=self.transform_cols), new_X], axis=1) return new_X def fit_transform(self, X, y=None): return self.fit(X).transform(X)