def test_compare_scikit_learn(): X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) pcas = sklPCA(n_components=2) pcas.fit(X) pca = PCA(NDDataset(X)) pca.printev(n_pc=2) assert_array_almost_equal(pca.sv.data, pcas.singular_values_) assert_array_almost_equal(pca.ev_ratio.data, pcas.explained_variance_ratio_ * 100.) dataset = NDDataset.read('irdata/nh4y-activation.spg') X = dataset.data pcas = sklPCA(n_components=5) pcas.fit(X) dataset = X.copy() pca = PCA(NDDataset(dataset)) pca.printev(n_pc=5) assert_array_almost_equal(pca.sv.data[:5], pcas.singular_values_[:5], 4) assert_array_almost_equal(pca.ev_ratio.data[:5], pcas.explained_variance_ratio_[:5] * 100., 4)
def PCA(data, n_components=2, desired_var_explained=None): if desired_var_explained is not None: pca = sklPCA(n_components=desired_var_explained) else: pca = sklPCA(n_components=n_components) pca.fit_transform([data[key] for key in data]) # for i in range(len(pca.explained_variance_ratio_)): return pca
def _doPCA(self, nComps, syntheticSEDs): """Do PCA on the synthetic spectra @param nComps number of components to keep @param syntheticSEDs library of synthetic spectra """ waveLen, smooth_spectra = sedMapper.get_sed_array( syntheticSEDs, self.minWavelen, self.maxWavelen, self.nWavelen) self.specPCA = sklPCA(nComps) self.specPCA.fit(smooth_spectra)
def _doPCA(self, nComps, syntheticSEDs): """Do PCA on the synthetic spectra @param nComps number of components to keep @param syntheticSEDs library of synthetic spectra """ waveLen, smooth_spectra = sedMapper.get_sed_array(syntheticSEDs, self.minWavelen, self.maxWavelen, self.nWavelen) self.specPCA = sklPCA(nComps) self.specPCA.fit(smooth_spectra)
def _doPCA(self, ncomp, spectra): """ """ specPCA = sklPCA(ncomp) specPCA.fit(spectra) self.meanSpec = specPCA.mean_ self.eigenspectra = specPCA.components_ self.eigenvalue_coeffs = np.array(specPCA.transform(spectra)) print "Mean spectrum shape:", self.meanSpec.shape print "Eigenspectra shape:", self.eigenspectra.shape print "Eigenvalues shape:", self.eigenvalue_coeffs.shape
def test_compare_scikit_learn(): try: import_optional_dependency("scikit-learn") except ImportError: return from sklearn.decomposition import PCA as sklPCA X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) pcas = sklPCA(n_components=2) pcas.fit(X) pca = PCA(NDDataset(X)) pca.printev(n_pc=2) assert_array_almost_equal(pca._sv.data, pcas.singular_values_) assert_array_almost_equal(pca.ev_ratio.data, pcas.explained_variance_ratio_ * 100.0) dataset = NDDataset.read("irdata/nh4y-activation.spg") X1 = dataset.copy().data pcas = sklPCA(n_components=5, svd_solver="full") pcas.fit(X1) X2 = NDDataset(dataset.copy()) pca = PCA(X2) pca.printev(n_pc=5) assert_array_almost_equal(pca._sv.data[:5], pcas.singular_values_[:5], 4) assert_array_almost_equal(pca.ev_ratio.data[:5], pcas.explained_variance_ratio_[:5] * 100.0, 4) show()
def _doPCA(self, ncomp, spectra): """PCA the SED set in array @param ncomp number of principle components to keep @param spectra array of SEDs """ specPCA = sklPCA(ncomp) specPCA.fit(spectra) self.meanSpec = specPCA.mean_ self.eigenspectra = specPCA.components_ self.eigenvalue_coeffs = np.array(specPCA.transform(spectra)) print "Mean spectrum shape:", self.meanSpec.shape print "Eigenspectra shape:", self.eigenspectra.shape print "Eigenvalues shape:", self.eigenvalue_coeffs.shape
graph.annotate(fig, features_label, (.55, .95)) return fig, AX if __name__ == '__main__': from datavyz import ge # LOADING THE DATA from sklearn.datasets import load_breast_cancer data = load_breast_cancer() # PERFORMING PCA from sklearn.decomposition import PCA as sklPCA pca = sklPCA(n_components=4) pca.fit_transform(data['data']) # PLOT fig, AX = ge.components_plot(pca.components_) ge.savefig(fig, 'docs/components-plot.png') ge.show() from sklearn.datasets import load_iris dataset = load_iris() fig, ax = ge.parallel_plot( dataset['data'], SET_OF_LABELS=[ 'sepal length\n(cm)', 'sepal width\n(cm)', 'petal length\n(cm)', 'petal width\n(cm)' ],
def PCA(Xtrain, Xtest, n_components=2): pca = sklPCA(n_components) Xtrain = pca.fit_transform(Xtrain) Xtest = pca.transform(Xtest) return Xtrain, Xtest
features = nb.loc[i, 'ActMod_trqClth'].tolist() nbLetters = sum(features) # nb of appearance of symbols for engine speed features = features + nb.loc[i, 'Epm_nEng'].tolist() # nb of appearance of symbols for pedal position features = features + nb.loc[i, 'APP_r'].tolist() # duration of hoop features = (np.array(features) / nbLetters).tolist() features = features + [ listValuesHoops[i].loc[listValuesHoops[i].index[-1], 'time'] - listValuesHoops[i].loc[listValuesHoops[i].index[0], 'time'] ] featuresMatrix.append(features) return pd.DataFrame.from_records(featuresMatrix) from sklearn.decomposition import PCA as sklPCA from sklearn.preprocessing import MinMaxScaler from mpl_toolkits.mplot3d import Axes3D test = getFeatures(listHoops, listValuesHoops) # We run PCA sklPCA = sklPCA(n_components=8) test2 = MinMaxScaler().fit_transform(test) output = sklPCA.fit_transform(test2) fig = plt.figure() ax = fig.add_subplot(111, projection='3d') ax.scatter(output[:, 0], output[:, 1], output[:, 2], c='b', marker='o') plt.show()