コード例 #1
0
ファイル: spec_data.py プロジェクト: jakevdp/spec_data
class CleanSpectra(object):
    def __init__(self, min_wavelength=3500, max_wavelength=8300,
                 max_masked_fraction=1.0):
        self.min_wavelength = min_wavelength
        self.max_wavelength = max_wavelength
        self.max_masked_fraction = max_masked_fraction

    def load_data(self, h5file, selection=None):
        if not isinstance(selection, slice):
            selection = slice(selection)

        datafile = h5py.File(h5file, 'r')
        wavelengths = 10 ** datafile['log_wavelengths'][:]
        mask = ((wavelengths >= self.min_wavelength) &
                (wavelengths <= self.max_wavelength))
        self.wavelengths = wavelengths[mask]
        self.spectra = datafile['spectra'][selection, mask]
        self.weights = datafile['ivars'][selection, mask]
        datafile.close()

        # remove rows with excessive missing data
        good_rows = (self.weights == 0).mean(1) < self.max_masked_fraction
        self.spectra = self.spectra[good_rows]
        self.weights = self.weights[good_rows]
        self.weights **= 0.5
        return self

    def fit_wpca(self, n_components=200, regularization=False):
        self.wpca = WPCA(n_components=n_components,
                         regularization=regularization)
        self.wpca.fit(self.spectra, weights=self.weights)
        return self

    def reconstruct(self, spectra=None, weights=None, p=2):
        if spectra is None:
            spectra = self.spectra
        if weights is None:
            weights = self.weights

        new_spectra = self.wpca.reconstruct(spectra, weights=weights)
        SN = abs(spectra * weights) ** (1. / p)
        SN /= SN.max(1, keepdims=True)
        return SN * spectra + (1 - SN) * new_spectra
コード例 #2
0
ファイル: test_wpca_common.py プロジェクト: jakevdp/wpca
def test_copy_data():
    rand = np.random.RandomState(0)
    X = rand.multivariate_normal([0, 0], [[12, 6], [6, 5]], size=100)
    W = rand.rand(*X.shape)
    X_orig = X.copy()

    # with copy_data=True, X should not change
    pca1 = WPCA(copy_data=True)
    pca1.fit(X, weights=W)
    assert np.all(X == X_orig)

    # with copy_data=False, X should be overwritten
    pca2 = WPCA(copy_data=False)
    pca2.fit(X, weights=W)
    assert not np.allclose(X, X_orig)

    # all results should match
    assert_allclose(pca1.mean_, pca2.mean_)
    assert_allclose(pca1.components_, pca2.components_)
    assert_allclose(pca1.explained_variance_, pca2.explained_variance_)
コード例 #3
0
ファイル: spec_data.py プロジェクト: jakevdp/spec_data
 def fit_wpca(self, n_components=200, regularization=False):
     self.wpca = WPCA(n_components=n_components,
                      regularization=regularization)
     self.wpca.fit(self.spectra, weights=self.weights)
     return self