Python HDF5Interface Examples, Starfish.grid_tools.HDF5Interface Python Examples

Example #1

0

Show file

File: emulator.py Project: spencerhurt/Starfish

    def from_grid(cls, grid, **pca_kwargs):
        """
        Create an Emulator using PCA decomposition from a GridInterface.

        Parameters
        ----------
        grid : :class:`GridInterface` or str
            The grid interface to decompose
        pca_kwargs : dict, optional
            The keyword arguments to pass to PCA. By default, `n_components=0.99` and
            `svd_solver='full'`.

        See Also
        --------
        sklearn.decomposition.PCA
        """
        # Load grid if a string is given
        if isinstance(grid, str):
            grid = HDF5Interface(grid)

        fluxes = np.array(list(grid.fluxes))
        # Normalize to an average of 1 to remove uninteresting correlation
        flux_scalar = fluxes.mean(1, keepdims=True)
        fluxes /= flux_scalar
        # Center and whiten
        flux_mean = fluxes.mean(0)
        fluxes -= flux_mean
        flux_std = fluxes.std(0)
        fluxes /= flux_std

        # Perform PCA using sklearn
        default_pca_kwargs = dict(n_components=0.99, svd_solver="full")
        default_pca_kwargs.update(pca_kwargs)
        pca = PCA(**default_pca_kwargs)
        weights = pca.fit_transform(fluxes)
        eigenspectra = pca.components_

        exp_var = pca.explained_variance_ratio_.sum()
        # This is basically the mean square error of the reconstruction
        log.info(
            f"PCA fit {exp_var:.2f}% of the variance with {pca.n_components_:d} components."
        )
        w_hat = get_w_hat(eigenspectra, fluxes)

        emulator = cls(
            grid_points=grid.grid_points,
            param_names=grid.param_names,
            wavelength=grid.wl,
            weights=weights,
            eigenspectra=eigenspectra,
            w_hat=w_hat,
            flux_mean=flux_mean,
            flux_std=flux_std,
            flux_scalar=flux_scalar,
        )
        return emulator

Example #2

0

Show file

File: pca.py Project: norrisryan/Starfish

)
args = parser.parse_args()

import matplotlib.pyplot as plt
import multiprocessing as mp
import numpy as np
import itertools
import Starfish
from Starfish import emulator
from Starfish.grid_tools import HDF5Interface
from Starfish.emulator import PCAGrid, Gprior, Glnprior, Emulator
from Starfish.covariance import Sigma
import os

if args.create:
    myHDF5 = HDF5Interface()
    my_pca = PCAGrid.create(myHDF5)
    my_pca.write()

if args.plot == "reconstruct":
    my_HDF5 = HDF5Interface()
    my_pca = PCAGrid.open()

    recon_fluxes = my_pca.reconstruct_all()

    # we need to apply the same normalization to the synthetic fluxes that we
    # used for the reconstruction
    fluxes = np.empty((my_pca.M, my_pca.npix))
    for i, spec in enumerate(my_HDF5.fluxes):
        fluxes[i, :] = spec

Example #3

0

Show file

    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__,
                                                       self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(
            self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum,
                                                   self.order_key,
                                                   npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id,
                                        self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        self.resid_deque = deque(
            maxlen=500
        )  #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.interpolator = Interpolator(self.wl, HDF5Interface())
        self.flux = None  # Where the interpolator will store the flux

        self.wl_FFT = self.interpolator.wl

        # The raw eigenspectra and mean flux components

        self.ss = np.fft.rfftfreq(len(self.wl_FFT),
                                  d=self.interpolator.interface.dv)
        self.ss[0] = 0.01  # junk so we don't get a divide by zero error

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)

        self.lnprior = 0.0  # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(
            self.spectrum_id, self.order)

Example #4

0

Show file

File: compare_starfish_spectra.py Project: jason-neal/companion_simulations

I notice a difference in the flux level of starfish spectra compared to the manually loaded spectra.

Investigating that here.
"""
import os

import matplotlib.pyplot as plt
import numpy as np
from Starfish.grid_tools import HDF5Interface
from astropy.io import fits
from spectrum_overload import Spectrum

import simulators

myHDF5 = HDF5Interface()
wl = myHDF5.wl

params = [6100, 4.5, 0.0]
flux = myHDF5.load_flux(np.array(params))

# Load direct phoenix spectra

path = simulators.starfish_grid["raw_path"]

phoenix = os.path.join(
    path, "Z-0.0",
    "lte{:05d}-{:0.2f}-0.0.PHOENIX-ACES-AGSS-COND-2011-HiRes.fits".format(
        params[0], params[1]))
phoenix_wav = os.path.join(path, "WAVE_PHOENIX-ACES-AGSS-COND-2011.fits")
print(phoenix)

Example #5

0

Show file

cfg = yaml.load(f)
f.close()

import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter as FSF
from matplotlib.ticker import MaxNLocator
from matplotlib.ticker import MultipleLocator
from Starfish.grid_tools import HDF5Interface
from Starfish.emulator import PCAGrid

pcagrid = PCAGrid.from_cfg(cfg)
ind = pcagrid.ind

#Make sure that we can get the same indices from the main grid.
grid = HDF5Interface(cfg["grid"], ranges=cfg["ranges"])
wl = grid.wl[ind]

temps = np.unique(pcagrid.gparams[:,0])
loggs = np.unique(pcagrid.gparams[:,1])
Zs = np.unique(pcagrid.gparams[:,2])
points = {"temp":temps, "logg":loggs, "Z":Zs}

base = cfg['outdir']
# Plot the eigenspectra

for i,comp in enumerate(pcagrid.pcomps):
    print("plotting {}".format(i))

    fig = plt.figure(figsize=(8, 5))
    ax = fig.add_subplot(111)

Example #6

0

Show file

    def from_cfg(cls, cfg):
        '''
        :param cfg: dictionary containing the parameters.
        '''

        grid = HDF5Interface(cfg["grid"], ranges=cfg["ranges"])
        wl = grid.wl
        min_v = grid.wl_header["min_v"]

        if 'wl' in cfg:
            low, high = cfg['wl']
            ind = determine_chunk_log(
                wl, low, high)  #Sets the wavelength vector using a power of 2
            wl = wl[ind]
        else:
            ind = np.ones_like(wl, dtype="bool")

        npix = len(wl)
        m = len(grid.list_grid_points)
        test_index = cfg['test_index']

        if test_index < m:
            #If the index actually corresponds to a spectrum in the grid, we're dropping it out. Otherwise,
            #leave it in by simply setting test_index to something larger than the number of spectra in the grid.
            m -= 1

        fluxes = np.empty((m, npix))

        z = 0
        for i, spec in enumerate(grid.fluxes):
            if i == test_index:
                test_spectrum = spec[ind]
                continue
            fluxes[z, :] = spec[ind]
            z += 1

        #Normalize all of the fluxes to an average value of 1
        #In order to remove interesting correlations
        fluxes = fluxes / np.average(fluxes, axis=1)[np.newaxis].T

        #Subtract the mean from all of the fluxes.
        flux_mean = np.average(fluxes, axis=0)
        fluxes -= flux_mean

        #"Whiten" each spectrum such that the variance for each wavelength is 1
        flux_std = np.std(fluxes, axis=0)
        fluxes /= flux_std

        pca = PCA()
        pca.fit(fluxes)
        comp = pca.transform(fluxes)
        components = pca.components_
        mean = pca.mean_
        print("Shape of PCA components {}".format(components.shape))

        if not np.allclose(mean, np.zeros_like(mean)):
            import sys
            sys.exit(
                "PCA mean is more than just numerical noise. Something's wrong!"
            )

            #Otherwise, the PCA mean is just numerical noise that we can ignore.

        ncomp = cfg['ncomp']
        print("Keeping only the first {} components".format(ncomp))
        pcomps = components[0:ncomp]

        gparams = np.empty((m, 3))
        z = 0
        for i, params in enumerate(grid.list_grid_points):
            if i == test_index:
                test_params = np.array(
                    [params["temp"], params["logg"], params["Z"]])
                continue
            gparams[z, :] = np.array(
                [params["temp"], params["logg"], params["Z"]])
            z += 1

        #Create w

        w = np.empty((ncomp, m))
        for i, pcomp in enumerate(pcomps):
            for j, spec in enumerate(fluxes):
                w[i, j] = np.sum(pcomp * spec)

        pca = cls(wl, min_v, flux_mean, flux_std, pcomps, w, gparams)
        pca.ind = ind
        return pca

Example #7

0

Show file

File: compare_starfish_spectra.py Project: jason-neal/companion_simulations

I notice a difference in the flux level of starfish spectra compared to the manually loaded spectra.

Investigating that here.
"""
import os

import Starfish
import matplotlib.pyplot as plt
import numpy as np
from Starfish.grid_tools import HDF5Interface
from astropy.io import fits

import simulators

myHDF5 = HDF5Interface(
    filename=
    "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_50k.hdf5"
)
myHDF5_air = HDF5Interface(
    filename=
    "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_air.hdf5"
)
myHDF5_norm_air = HDF5Interface(
    filename=
    "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_norm_air.hdf5"
)
myHDF5_norm = HDF5Interface(
    filename=
    "/home/jneal/Phd/Codes/companion_simulations/starfish_tests/libraries/PHOENIX_norm.hdf5"
)
wl = myHDF5.wl
wl_air = myHDF5_air.wl

Example #8

0

Show file

File: read_HDF5.py Project: jason-neal/companion_simulations

#!/usr/bin/env python
# Test reading hdf5 file that I created
import numpy as np

from Starfish.grid_tools import HDF5Interface

my_hdf5 = HDF5Interface()
wl = my_hdf5.wl
flux = my_hdf5.load_flux(np.array([6100, 4.5, 0.0]))

Example #9

0

Show file

File: phoenix_utils.py Project: jason-neal/companion_simulations

def load_starfish_spectrum(params,
                           limits=None,
                           hdr=False,
                           normalize=False,
                           area_scale=False,
                           flux_rescale=False,
                           wav_scale=True):
    """Load spectrum from hdf5 grid file.

    Parameters
    ----------
    params: list
        Model parameters [teff, logg, Z]
    limits= List[float, float] default=None
        Wavelength limits.
    hdr: bool
       Include the model header. Default False.
    normalize: bool
        Locally normalize the spectrum. Default False.
    area_scale: bool
        Multiply by stellar surface area pi*R**2 (towards Earth)
    flux_rescale: bool
        Convert from /cm to /nm by dividing by 1e7
    wav_scale: bool
        Multiply by wavelength to turn into [erg/s/cm^2]

    Returns
    -------
    spec: Spectrum
        The loaded spectrum as Spectrum object.
    """
    my_hdf5 = HDF5Interface()
    my_hdf5.wl = my_hdf5.wl / 10  # Turn into Nanometer

    if hdr:
        flux, myhdr = my_hdf5.load_flux_hdr(np.array(params))
        spec = Spectrum(flux=flux, xaxis=my_hdf5.wl, header=myhdr)
    else:
        flux = my_hdf5.load_flux(np.array(params))
        spec = Spectrum(flux=flux, xaxis=my_hdf5.wl)

    if flux_rescale:
        spec = spec * 1e-7  # convert flux unit from /cm to /nm

    if area_scale:
        if hdr:
            emitting_area = phoenix_area(spec.header)
            spec = spec * emitting_area
            spec.header["emit_area"] = (emitting_area, "pi*r^2")
        else:
            raise ValueError("No header provided for stellar area scaling")

    if wav_scale:
        # Convert into photon counts, (constants ignored)
        spec = spec * spec.xaxis

    if normalize:
        spec = spec_local_norm(spec, method="exponential")

    if limits is not None:
        if limits[0] > spec.xaxis[-1] or limits[-1] < spec.xaxis[0]:
            logging.warning(
                "Warning: The wavelength limits do not overlap the spectrum."
                "There is no spectrum left... Check your wavelength, or limits."
            )
        spec.wav_select(*limits)

    return spec

Example #10

0

Show file

def mock_hdf5_interface(mock_hdf5):
    yield HDF5Interface(mock_hdf5)

Example #11

0

Show file

if args.plot:

    # Check to make sure the file exists

    import os
    hdf5_path = os.path.expandvars(Starfish.grid["hdf5_path"])
    if not os.path.exists(hdf5_path):
        print(
            "HDF5 file does not yet exist. Please run `grid.py create` first.")
        import sys
        sys.exit()

    import multiprocessing as mp
    import matplotlib.pyplot as plt
    from Starfish.grid_tools import HDF5Interface
    interface = HDF5Interface()

    par_fluxes = zip(interface.grid_points, interface.fluxes)

    # Define the plotting function
    def plot(par_flux):
        par, flux = par_flux
        fig, ax = plt.subplots(nrows=1, figsize=(8, 6))
        ax.plot(interface.wl, flux)
        ax.set_xlabel(r"$\lambda$ [AA]")
        ax.set_ylabel(r"$f_\lambda$")
        fmt = "=".join(["{:.2f}" for i in range(len(Starfish.parname))])
        name = fmt.format(*[p for p in par])
        fig.savefig(Starfish.config["plotdir"] + "g" + name + ".png")

        plt.close("all")