Esempio n. 1
0
def test_compare_scikit_learn():
    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])

    pcas = sklPCA(n_components=2)
    pcas.fit(X)

    pca = PCA(NDDataset(X))
    pca.printev(n_pc=2)

    assert_array_almost_equal(pca.sv.data, pcas.singular_values_)
    assert_array_almost_equal(pca.ev_ratio.data, pcas.explained_variance_ratio_ * 100.)

    dataset = NDDataset.read('irdata/nh4y-activation.spg')
    X = dataset.data

    pcas = sklPCA(n_components=5)
    pcas.fit(X)

    dataset = X.copy()
    pca = PCA(NDDataset(dataset))

    pca.printev(n_pc=5)

    assert_array_almost_equal(pca.sv.data[:5], pcas.singular_values_[:5], 4)
    assert_array_almost_equal(pca.ev_ratio.data[:5], pcas.explained_variance_ratio_[:5] * 100., 4)
Esempio n. 2
0
File: pca.py Progetto: yzerlaut/ml
def PCA(data, n_components=2, desired_var_explained=None):

    if desired_var_explained is not None:
        pca = sklPCA(n_components=desired_var_explained)
    else:
        pca = sklPCA(n_components=n_components)

    pca.fit_transform([data[key] for key in data])

    # for i in range(len(pca.explained_variance_ratio_)):

    return pca
Esempio n. 3
0
    def _doPCA(self, nComps, syntheticSEDs):
        """Do PCA on the synthetic spectra
        
           @param nComps           number of components to keep
           @param syntheticSEDs    library of synthetic spectra
        """

        waveLen, smooth_spectra = sedMapper.get_sed_array(
            syntheticSEDs, self.minWavelen, self.maxWavelen, self.nWavelen)

        self.specPCA = sklPCA(nComps)
        self.specPCA.fit(smooth_spectra)
 def _doPCA(self, nComps, syntheticSEDs):
     """Do PCA on the synthetic spectra
     
        @param nComps           number of components to keep
        @param syntheticSEDs    library of synthetic spectra
     """
 
     waveLen, smooth_spectra = sedMapper.get_sed_array(syntheticSEDs, 
                                                       self.minWavelen, self.maxWavelen, self.nWavelen)
 
     self.specPCA = sklPCA(nComps)
     self.specPCA.fit(smooth_spectra)
Esempio n. 5
0
 def _doPCA(self, ncomp, spectra):
     """
     """
 
     specPCA = sklPCA(ncomp)
     specPCA.fit(spectra)
     self.meanSpec = specPCA.mean_
     self.eigenspectra = specPCA.components_
     self.eigenvalue_coeffs = np.array(specPCA.transform(spectra))
     
     print "Mean spectrum shape:", self.meanSpec.shape
     print "Eigenspectra shape:", self.eigenspectra.shape
     print "Eigenvalues shape:", self.eigenvalue_coeffs.shape
Esempio n. 6
0
def test_compare_scikit_learn():

    try:
        import_optional_dependency("scikit-learn")
    except ImportError:
        return

    from sklearn.decomposition import PCA as sklPCA

    X = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]])

    pcas = sklPCA(n_components=2)
    pcas.fit(X)

    pca = PCA(NDDataset(X))
    pca.printev(n_pc=2)

    assert_array_almost_equal(pca._sv.data, pcas.singular_values_)
    assert_array_almost_equal(pca.ev_ratio.data,
                              pcas.explained_variance_ratio_ * 100.0)

    dataset = NDDataset.read("irdata/nh4y-activation.spg")
    X1 = dataset.copy().data

    pcas = sklPCA(n_components=5, svd_solver="full")
    pcas.fit(X1)

    X2 = NDDataset(dataset.copy())
    pca = PCA(X2)

    pca.printev(n_pc=5)

    assert_array_almost_equal(pca._sv.data[:5], pcas.singular_values_[:5], 4)
    assert_array_almost_equal(pca.ev_ratio.data[:5],
                              pcas.explained_variance_ratio_[:5] * 100.0, 4)

    show()
 def _doPCA(self, ncomp, spectra):
     """PCA the SED set in array
     
        @param ncomp    number of principle components to keep
        @param spectra  array of SEDs
     """
 
     specPCA = sklPCA(ncomp)
     specPCA.fit(spectra)
     self.meanSpec = specPCA.mean_
     self.eigenspectra = specPCA.components_
     self.eigenvalue_coeffs = np.array(specPCA.transform(spectra))
     
     print "Mean spectrum shape:", self.meanSpec.shape
     print "Eigenspectra shape:", self.eigenspectra.shape
     print "Eigenvalues shape:", self.eigenvalue_coeffs.shape
Esempio n. 8
0
    def _doPCA(self, ncomp, spectra):
        """PCA the SED set in array
        
           @param ncomp    number of principle components to keep
           @param spectra  array of SEDs
        """

        specPCA = sklPCA(ncomp)
        specPCA.fit(spectra)
        self.meanSpec = specPCA.mean_
        self.eigenspectra = specPCA.components_
        self.eigenvalue_coeffs = np.array(specPCA.transform(spectra))

        print "Mean spectrum shape:", self.meanSpec.shape
        print "Eigenspectra shape:", self.eigenspectra.shape
        print "Eigenvalues shape:", self.eigenvalue_coeffs.shape
Esempio n. 9
0
    graph.annotate(fig, features_label, (.55, .95))

    return fig, AX


if __name__ == '__main__':

    from datavyz import ge

    # LOADING THE DATA
    from sklearn.datasets import load_breast_cancer
    data = load_breast_cancer()

    # PERFORMING PCA
    from sklearn.decomposition import PCA as sklPCA
    pca = sklPCA(n_components=4)
    pca.fit_transform(data['data'])

    # PLOT
    fig, AX = ge.components_plot(pca.components_)
    ge.savefig(fig, 'docs/components-plot.png')
    ge.show()

    from sklearn.datasets import load_iris
    dataset = load_iris()
    fig, ax = ge.parallel_plot(
        dataset['data'],
        SET_OF_LABELS=[
            'sepal length\n(cm)', 'sepal width\n(cm)', 'petal length\n(cm)',
            'petal width\n(cm)'
        ],
Esempio n. 10
0
def PCA(Xtrain, Xtest, n_components=2):
    pca = sklPCA(n_components)
    Xtrain = pca.fit_transform(Xtrain)
    Xtest = pca.transform(Xtest)
    return Xtrain, Xtest
        features = nb.loc[i, 'ActMod_trqClth'].tolist()
        nbLetters = sum(features)
        # nb of appearance of symbols for engine speed
        features = features + nb.loc[i, 'Epm_nEng'].tolist()
        # nb of appearance of symbols for pedal position
        features = features + nb.loc[i, 'APP_r'].tolist()
        # duration of hoop
        features = (np.array(features) / nbLetters).tolist()
        features = features + [
            listValuesHoops[i].loc[listValuesHoops[i].index[-1], 'time'] -
            listValuesHoops[i].loc[listValuesHoops[i].index[0], 'time']
        ]
        featuresMatrix.append(features)
    return pd.DataFrame.from_records(featuresMatrix)


from sklearn.decomposition import PCA as sklPCA
from sklearn.preprocessing import MinMaxScaler
from mpl_toolkits.mplot3d import Axes3D

test = getFeatures(listHoops, listValuesHoops)

# We run PCA
sklPCA = sklPCA(n_components=8)
test2 = MinMaxScaler().fit_transform(test)
output = sklPCA.fit_transform(test2)

fig = plt.figure()
ax = fig.add_subplot(111, projection='3d')
ax.scatter(output[:, 0], output[:, 1], output[:, 2], c='b', marker='o')
plt.show()