def from_grid(cls, grid, **pca_kwargs): """ Create an Emulator using PCA decomposition from a GridInterface. Parameters ---------- grid : :class:`GridInterface` or str The grid interface to decompose pca_kwargs : dict, optional The keyword arguments to pass to PCA. By default, `n_components=0.99` and `svd_solver='full'`. See Also -------- sklearn.decomposition.PCA """ # Load grid if a string is given if isinstance(grid, str): grid = HDF5Interface(grid) fluxes = np.array(list(grid.fluxes)) # Normalize to an average of 1 to remove uninteresting correlation flux_scalar = fluxes.mean(1, keepdims=True) fluxes /= flux_scalar # Center and whiten flux_mean = fluxes.mean(0) fluxes -= flux_mean flux_std = fluxes.std(0) fluxes /= flux_std # Perform PCA using sklearn default_pca_kwargs = dict(n_components=0.99, svd_solver="full") default_pca_kwargs.update(pca_kwargs) pca = PCA(**default_pca_kwargs) weights = pca.fit_transform(fluxes) eigenspectra = pca.components_ exp_var = pca.explained_variance_ratio_.sum() # This is basically the mean square error of the reconstruction log.info( f"PCA fit {exp_var:.2f}% of the variance with {pca.n_components_:d} components." ) w_hat = get_w_hat(eigenspectra, fluxes) emulator = cls( grid_points=grid.grid_points, param_names=grid.param_names, wavelength=grid.wl, weights=weights, eigenspectra=eigenspectra, w_hat=w_hat, flux_mean=flux_mean, flux_std=flux_std, flux_scalar=flux_scalar, ) return emulator
) args = parser.parse_args() import matplotlib.pyplot as plt import multiprocessing as mp import numpy as np import itertools import Starfish from Starfish import emulator from Starfish.grid_tools import HDF5Interface from Starfish.emulator import PCAGrid, Gprior, Glnprior, Emulator from Starfish.covariance import Sigma import os if args.create: myHDF5 = HDF5Interface() my_pca = PCAGrid.create(myHDF5) my_pca.write() if args.plot == "reconstruct": my_HDF5 = HDF5Interface() my_pca = PCAGrid.open() recon_fluxes = my_pca.reconstruct_all() # we need to apply the same normalization to the synthetic fluxes that we # used for the reconstruction fluxes = np.empty((my_pca.M, my_pca.npix)) for i, spec in enumerate(my_HDF5.fluxes): fluxes[i, :] = spec
def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format( self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) self.resid_deque = deque( maxlen=500 ) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.interpolator = Interpolator(self.wl, HDF5Interface()) self.flux = None # Where the interpolator will store the flux self.wl_FFT = self.interpolator.wl # The raw eigenspectra and mean flux components self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.interpolator.interface.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format( self.spectrum_id, self.order)
I notice a difference in the flux level of starfish spectra compared to the manually loaded spectra. Investigating that here. """ import os import matplotlib.pyplot as plt import numpy as np from Starfish.grid_tools import HDF5Interface from astropy.io import fits from spectrum_overload import Spectrum import simulators myHDF5 = HDF5Interface() wl = myHDF5.wl params = [6100, 4.5, 0.0] flux = myHDF5.load_flux(np.array(params)) # Load direct phoenix spectra path = simulators.starfish_grid["raw_path"] phoenix = os.path.join( path, "Z-0.0", "lte{:05d}-{:0.2f}-0.0.PHOENIX-ACES-AGSS-COND-2011-HiRes.fits".format( params[0], params[1])) phoenix_wav = os.path.join(path, "WAVE_PHOENIX-ACES-AGSS-COND-2011.fits") print(phoenix)
cfg = yaml.load(f) f.close() import numpy as np import matplotlib.pyplot as plt from matplotlib.ticker import FormatStrFormatter as FSF from matplotlib.ticker import MaxNLocator from matplotlib.ticker import MultipleLocator from Starfish.grid_tools import HDF5Interface from Starfish.emulator import PCAGrid pcagrid = PCAGrid.from_cfg(cfg) ind = pcagrid.ind #Make sure that we can get the same indices from the main grid. grid = HDF5Interface(cfg["grid"], ranges=cfg["ranges"]) wl = grid.wl[ind] temps = np.unique(pcagrid.gparams[:,0]) loggs = np.unique(pcagrid.gparams[:,1]) Zs = np.unique(pcagrid.gparams[:,2]) points = {"temp":temps, "logg":loggs, "Z":Zs} base = cfg['outdir'] # Plot the eigenspectra for i,comp in enumerate(pcagrid.pcomps): print("plotting {}".format(i)) fig = plt.figure(figsize=(8, 5)) ax = fig.add_subplot(111)
def from_cfg(cls, cfg): ''' :param cfg: dictionary containing the parameters. ''' grid = HDF5Interface(cfg["grid"], ranges=cfg["ranges"]) wl = grid.wl min_v = grid.wl_header["min_v"] if 'wl' in cfg: low, high = cfg['wl'] ind = determine_chunk_log( wl, low, high) #Sets the wavelength vector using a power of 2 wl = wl[ind] else: ind = np.ones_like(wl, dtype="bool") npix = len(wl) m = len(grid.list_grid_points) test_index = cfg['test_index'] if test_index < m: #If the index actually corresponds to a spectrum in the grid, we're dropping it out. Otherwise, #leave it in by simply setting test_index to something larger than the number of spectra in the grid. m -= 1 fluxes = np.empty((m, npix)) z = 0 for i, spec in enumerate(grid.fluxes): if i == test_index: test_spectrum = spec[ind] continue fluxes[z, :] = spec[ind] z += 1 #Normalize all of the fluxes to an average value of 1 #In order to remove interesting correlations fluxes = fluxes / np.average(fluxes, axis=1)[np.newaxis].T #Subtract the mean from all of the fluxes. flux_mean = np.average(fluxes, axis=0) fluxes -= flux_mean #"Whiten" each spectrum such that the variance for each wavelength is 1 flux_std = np.std(fluxes, axis=0) fluxes /= flux_std pca = PCA() pca.fit(fluxes) comp = pca.transform(fluxes) components = pca.components_ mean = pca.mean_ print("Shape of PCA components {}".format(components.shape)) if not np.allclose(mean, np.zeros_like(mean)): import sys sys.exit( "PCA mean is more than just numerical noise. Something's wrong!" ) #Otherwise, the PCA mean is just numerical noise that we can ignore. ncomp = cfg['ncomp'] print("Keeping only the first {} components".format(ncomp)) pcomps = components[0:ncomp] gparams = np.empty((m, 3)) z = 0 for i, params in enumerate(grid.list_grid_points): if i == test_index: test_params = np.array( [params["temp"], params["logg"], params["Z"]]) continue gparams[z, :] = np.array( [params["temp"], params["logg"], params["Z"]]) z += 1 #Create w w = np.empty((ncomp, m)) for i, pcomp in enumerate(pcomps): for j, spec in enumerate(fluxes): w[i, j] = np.sum(pcomp * spec) pca = cls(wl, min_v, flux_mean, flux_std, pcomps, w, gparams) pca.ind = ind return pca
I notice a difference in the flux level of starfish spectra compared to the manually loaded spectra. Investigating that here. """ import os import Starfish import matplotlib.pyplot as plt import numpy as np from Starfish.grid_tools import HDF5Interface from astropy.io import fits import simulators myHDF5 = HDF5Interface( filename= "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_50k.hdf5" ) myHDF5_air = HDF5Interface( filename= "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_air.hdf5" ) myHDF5_norm_air = HDF5Interface( filename= "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_norm_air.hdf5" ) myHDF5_norm = HDF5Interface( filename= "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_norm.hdf5" ) wl = myHDF5.wl wl_air = myHDF5_air.wl
#!/usr/bin/env python # Test reading hdf5 file that I created import numpy as np from Starfish.grid_tools import HDF5Interface my_hdf5 = HDF5Interface() wl = my_hdf5.wl flux = my_hdf5.load_flux(np.array([6100, 4.5, 0.0]))
def load_starfish_spectrum(params, limits=None, hdr=False, normalize=False, area_scale=False, flux_rescale=False, wav_scale=True): """Load spectrum from hdf5 grid file. Parameters ---------- params: list Model parameters [teff, logg, Z] limits= List[float, float] default=None Wavelength limits. hdr: bool Include the model header. Default False. normalize: bool Locally normalize the spectrum. Default False. area_scale: bool Multiply by stellar surface area pi*R**2 (towards Earth) flux_rescale: bool Convert from /cm to /nm by dividing by 1e7 wav_scale: bool Multiply by wavelength to turn into [erg/s/cm^2] Returns ------- spec: Spectrum The loaded spectrum as Spectrum object. """ my_hdf5 = HDF5Interface() my_hdf5.wl = my_hdf5.wl / 10 # Turn into Nanometer if hdr: flux, myhdr = my_hdf5.load_flux_hdr(np.array(params)) spec = Spectrum(flux=flux, xaxis=my_hdf5.wl, header=myhdr) else: flux = my_hdf5.load_flux(np.array(params)) spec = Spectrum(flux=flux, xaxis=my_hdf5.wl) if flux_rescale: spec = spec * 1e-7 # convert flux unit from /cm to /nm if area_scale: if hdr: emitting_area = phoenix_area(spec.header) spec = spec * emitting_area spec.header["emit_area"] = (emitting_area, "pi*r^2") else: raise ValueError("No header provided for stellar area scaling") if wav_scale: # Convert into photon counts, (constants ignored) spec = spec * spec.xaxis if normalize: spec = spec_local_norm(spec, method="exponential") if limits is not None: if limits[0] > spec.xaxis[-1] or limits[-1] < spec.xaxis[0]: logging.warning( "Warning: The wavelength limits do not overlap the spectrum." "There is no spectrum left... Check your wavelength, or limits." ) spec.wav_select(*limits) return spec
def mock_hdf5_interface(mock_hdf5): yield HDF5Interface(mock_hdf5)
if args.plot: # Check to make sure the file exists import os hdf5_path = os.path.expandvars(Starfish.grid["hdf5_path"]) if not os.path.exists(hdf5_path): print( "HDF5 file does not yet exist. Please run `grid.py create` first.") import sys sys.exit() import multiprocessing as mp import matplotlib.pyplot as plt from Starfish.grid_tools import HDF5Interface interface = HDF5Interface() par_fluxes = zip(interface.grid_points, interface.fluxes) # Define the plotting function def plot(par_flux): par, flux = par_flux fig, ax = plt.subplots(nrows=1, figsize=(8, 6)) ax.plot(interface.wl, flux) ax.set_xlabel(r"$\lambda$ [AA]") ax.set_ylabel(r"$f_\lambda$") fmt = "=".join(["{:.2f}" for i in range(len(Starfish.parname))]) name = fmt.format(*[p for p in par]) fig.savefig(Starfish.config["plotdir"] + "g" + name + ".png") plt.close("all")