def optimize_components(X, feature_names, label, abbrev): # model selection (optimal number of components) # from https://scikit-learn.org/stable/auto_examples/decomposition/plot_pca_vs_fa_model_selection.html # prepare explained variance scorer def get_score(model, data, scorer=explained_variance_score): prediction = model.inverse_transform(model.transform(data)) return scorer(data, prediction) # choose number of components by explained variance n_components = np.arange(1, len(feature_names) + 1) nmf = NMF(random_state=SEED) nmf_scores = [] for n in n_components: nmf.n_components = n nmf.fit(X) nmf_scores.append(get_score(nmf, X)) nmf_scores = np.array(nmf_scores) n_components_nmf = n_components[np.argmax(nmf_scores >= 0.95)] print(label + ": best n_components by explained variance > 0.95 = %d" % int(n_components_nmf)) # create plot plt.figure() plt.plot(n_components, nmf_scores, 'b', label='explained variance by num/components') plt.axvline(n_components_nmf, color='b', label='chosen number of components: %d' % n_components_nmf, linestyle='--') # format plot ax = plt.gca() ax.xaxis.set_major_locator(MaxNLocator(integer=True)) plt.xlabel('number of components') plt.ylabel('explained variance') plt.legend(loc='lower right') plt.title(label + ": NMF model selection") plt.savefig(path.join(PLOT_DIR, abbrev + "_nmf_components.png"), bbox_inches='tight') plt.show() plt.close() return n_components_nmf
def compute_scores(X): pca = PCA(svd_solver='auto') kpca = KernelPCA(fit_inverse_transform=True) ica = FastICA() nmf = NMF(init='nndsvda') pca_scores, ica_scores, nmf_scores, kpca_scores = [], [], [], [] for n in n_components: pca.n_components = n ica.n_components = n nmf.n_components = n kpca.n_components = n print(n) Xpca = pca.inverse_transform(pca.fit_transform(Xs)) pca_scores.append(explained_variance_score(Xs, Xpca)) Xica = ica.inverse_transform(ica.fit_transform(Xs)) ica_scores.append(explained_variance_score(Xs, Xica)) Xkpca = kpca.inverse_transform(kpca.fit_transform(Xs)) kpca_scores.append(explained_variance_score(Xs, Xkpca)) Xnmf = nmf.inverse_transform(nmf.fit_transform(X)) nmf_scores.append(explained_variance_score(X, Xnmf)) return pca_scores, ica_scores, nmf_scores, kpca_scores