/
spec_data.py
56 lines (45 loc) · 1.96 KB
/
spec_data.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import numpy as np
import h5py
from wpca import WPCA
class CleanSpectra(object):
def __init__(self, min_wavelength=3500, max_wavelength=8300,
max_masked_fraction=1.0):
self.min_wavelength = min_wavelength
self.max_wavelength = max_wavelength
self.max_masked_fraction = max_masked_fraction
def load_data(self, h5file, selection=None):
if not isinstance(selection, slice):
selection = slice(selection)
datafile = h5py.File(h5file, 'r')
wavelengths = 10 ** datafile['log_wavelengths'][:]
mask = ((wavelengths >= self.min_wavelength) &
(wavelengths <= self.max_wavelength))
self.wavelengths = wavelengths[mask]
self.spectra = datafile['spectra'][selection, mask]
self.weights = datafile['ivars'][selection, mask]
datafile.close()
# remove rows with excessive missing data
good_rows = (self.weights == 0).mean(1) < self.max_masked_fraction
self.spectra = self.spectra[good_rows]
self.weights = self.weights[good_rows]
self.weights **= 0.5
return self
def fit_wpca(self, n_components=200, regularization=False):
self.wpca = WPCA(n_components=n_components,
regularization=regularization)
self.wpca.fit(self.spectra, weights=self.weights)
return self
def reconstruct(self, spectra=None, weights=None, p=2):
if spectra is None:
spectra = self.spectra
if weights is None:
weights = self.weights
new_spectra = self.wpca.reconstruct(spectra, weights=weights)
SN = abs(spectra * weights) ** (1. / p)
SN /= SN.max(1, keepdims=True)
return SN * spectra + (1 - SN) * new_spectra
def write_spectra_file(filename, spectra, wavelengths):
h5f = h5py.File(filename, 'w')
h5f.create_dataset('spectra', data=spectra)
h5f.create_dataset('wavelengths', data=wavelengths)
h5f.close()