Example #1
0
def mock_trained_emulator(mock_emulator):
    filename = os.path.join(test_base, "data", "emu.hdf5")
    if os.path.exists(filename):
        yield Emulator.load(filename)
    else:
        mock_emulator.train()
        mock_emulator.save(filename)
        yield mock_emulator
Example #2
0
 def test_save_load(self, mock_emulator, tmpdir):
     init = mock_emulator.get_param_dict()
     filename = tmpdir.join("emu.hdf5")
     mock_emulator.save(filename)
     emulator = Emulator.load(filename)
     final = emulator.get_param_dict()
     assert init == final
     assert emulator._trained == mock_emulator._trained
Example #3
0
    def __init__(
        self,
        emulator: Union[str, Emulator],
        data: Union[str, Spectrum],
        grid_params: Sequence[float],
        max_deque_len: int = 100,
        name: str = "SpectrumModel",
        **params,
    ):
        if isinstance(emulator, str):
            emulator = Emulator.load(emulator)
        if isinstance(data, str):
            data = Spectrum.load(data)

        if len(data) > 1:
            raise ValueError(
                "Multiple orders detected in data, please use EchelleModel")

        self.emulator: Emulator = emulator
        self.data_name = data.name
        self.data = data[0]

        dv = calculate_dv(self.data.wave)
        self.min_dv_wave = create_log_lam_grid(dv, self.emulator.wl.min(),
                                               self.emulator.wl.max())["wl"]
        self.bulk_fluxes = resample(self.emulator.wl,
                                    self.emulator.bulk_fluxes,
                                    self.min_dv_wave)

        self.residuals = deque(maxlen=max_deque_len)

        # manually handle cheb coeffs to offset index by 1
        chebs = params.pop("cheb", [])
        cheb_idxs = [str(i) for i in range(1, len(chebs) + 1)]
        params["cheb"] = dict(zip(cheb_idxs, chebs))
        # load rest of params into FlatterDict
        self.params = FlatterDict(params)
        self.frozen = []
        self.name = name

        # Unpack the grid parameters
        self.n_grid_params = len(grid_params)
        self.grid_params = grid_params

        # None means "yet to be calculated", do not use NaN
        self._lnprob = None
        self._glob_cov = None
        self._loc_cov = None

        self.log = logging.getLogger(self.__class__.__name__)

        self.flux_scalar_func = flux_scalar = LinearNDInterpolator(
            self.emulator.grid_points, self.emulator.flux_scalar)
    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        #self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.emulator = Emulator.open()
        self.emulator.determine_chunk_log(self.wl)

        self.pca = self.emulator.pca

        self.wl_FFT = self.pca.wl

        # The raw eigenspectra and mean flux components
        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))

        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        # Holders to store the convolved and resampled eigenspectra
        self.eigenspectra = np.empty((self.pca.m, self.ndata))
        self.flux_mean = np.empty((self.ndata,))
        self.flux_std = np.empty((self.ndata,))
        self.flux_scalar = None

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
        self.mus, self.C_GP, self.data_mat = None, None, None
        self.Omega = None

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
Example #5
0
    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.emulator = Emulator.open()
        self.emulator.determine_chunk_log(self.wl)

        self.pca = self.emulator.pca

        self.wl_FFT = self.pca.wl

        # The raw eigenspectra and mean flux components
        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))

        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        # Holders to store the convolved and resampled eigenspectra
        self.eigenspectra = np.empty((self.pca.m, self.ndata))
        self.flux_mean = np.empty((self.ndata,))
        self.flux_std = np.empty((self.ndata,))

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
        self.mus, self.C_GP, self.data_mat = None, None, None

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
Example #6
0
    elif args.params == "emcee":
        eparams = np.median(np.load("eparams_emcee.npy"), axis=0)
        print("Using emcee median")
    else:
        import sys
        sys.exit()

    # Print out the emulator parameters in an easily-readable format
    lambda_xi = eparams[0]
    hparams = eparams[1:].reshape((my_pca.m, -1))
    print("Emulator parameters are:")
    print("lambda_xi", lambda_xi)
    for row in hparams:
        print(row)

    emulator = Emulator(my_pca, eparams)

    # We will want to produce interpolated plots spanning each parameter dimension,
    # for each eigenspectrum.

    # Create a list of parameter blocks.
    # Go through each parameter, and create a list of all parameter combination of
    # the other two parameters.
    unique_points = [
        np.unique(my_pca.gparams[:, i]) for i in range(len(Starfish.parname))
    ]
    blocks = []
    for ipar, pname in enumerate(Starfish.parname):
        upars = unique_points.copy()
        dim = upars.pop(ipar)
        ndim = len(dim)
Example #7
0
wl = dataSpec.wls[0]

# Truncate these to our shorter range to make it faster
# ind = (wl > 5165.) & (wl < 5185.)
# wl = wl[ind]
#
fl = dataSpec.fls[0] #[ind]
sigma = dataSpec.sigmas[0] #[ind]
# mask = dataSpec.masks[0][ind]
ndata = len(wl)

print("ndata", ndata)
print("Data wl range", wl[0], wl[-1])

# Set up the emulator for this chunk
emulator = Emulator.open()
emulator.determine_chunk_log(wl)

pca = emulator.pca

wl_FFT_orig = pca.wl

print("FFT length", len(wl_FFT_orig))
print(wl_FFT_orig[0], wl_FFT_orig[-1])

# The raw eigenspectra and mean flux components
EIGENSPECTRA = np.vstack((pca.flux_mean[np.newaxis,:], pca.flux_std[np.newaxis,:], pca.eigenspectra))

ss = np.fft.rfftfreq(pca.npix, d=emulator.dv)
ss[0] = 0.01 # junk so we don't get a divide by zero error
Example #8
0
    def initialize(self, key):
        '''
        Initialize the OrderModel to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_id)
        :param type: (int, int)

        This should only be called after all subprocess have been forked.
        '''

        self.id = key
        self.spectrum_id, self.order_id = self.id

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(
            self.spectrum_id, self.order_id))

        self.instrument = Instruments[self.spectrum_id]
        self.DataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.DataSpectrum.wls[self.order_id]
        self.fl = self.DataSpectrum.fls[self.order_id]
        self.sigma = self.DataSpectrum.sigmas[self.order_id]
        self.npoints = len(self.wl)
        self.mask = self.DataSpectrum.masks[self.order_id]
        self.order = self.DataSpectrum.orders[self.order_id]

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__,
                                                       self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.npoly = config["cheb_degree"]
        self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum,
                                                   self.order_id,
                                                   npoly=self.npoly)
        self.resid_deque = deque(
            maxlen=500
        )  #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.Emulator = Emulator.open(
            config["PCA_path"])  # Returns mu and var vectors
        self.Emulator.determine_chunk_log(
            self.wl)  # Truncates the grid to this wl format, power of 2

        pg = self.Emulator.PCAGrid

        self.wl_FFT = pg.wl
        self.ncomp = pg.ncomp

        self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis, :],
                                 pg.flux_std[np.newaxis, :], pg.pcomps))

        self.min_v = self.Emulator.min_v
        self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v)
        self.ss[0] = 0.01  # junk so we don't get a divide by zero error

        self.pcomps = np.empty((self.ncomp, self.npoints))
        self.flux_mean = np.empty((self.npoints, ))
        self.flux_std = np.empty((self.npoints, ))
        self.mus, self.vars = None, None
        self.C_GP = None
        self.data_mat = None

        self.sigma_matrix = self.sigma**2 * np.eye(self.npoints)

        self.prior = 0.0  # Modified and set by NuisanceSampler.lnprob
        self.nregions = 0
        self.exceptions = []

        #TODO: perturb
        #if args.perturb:
        #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb)

        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, ))
        cheb_tuple = ("logc0", )
        # add in new coefficients
        for i in range(1, self.npoly):
            cheb_tuple += ("c{}".format(i), )
        # set starting position to 0
        cheb_Starting = {k: 0.0 for k in cheb_tuple}

        # Design cov starting
        cov_Starting = config['cov_params']
        cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting)
        cov_MH_cov = np.array(
            [float(config["cov_jump"][key]) for key in cov_tuple])**2

        nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov)))
        nuisance_starting = {
            "cheb": cheb_Starting,
            "cov": cov_Starting,
            "regions": {}
        }

        # Because this initialization is happening on the subprocess, I think
        # the random state should be fine.

        # Update the outdir based upon id
        self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order)

        # Create the nuisance parameter sampler to run independently
        self.sampler = NuisanceSampler(OrderModel=self,
                                       starting_param_dict=nuisance_starting,
                                       cov=nuisance_MH_cov,
                                       debug=True,
                                       outdir=self.noutdir,
                                       order=self.order)
        self.p0 = self.sampler.p0

        # Udpate the nuisance parameters to the starting values so that we at
        # least have a self.data_mat
        self.logger.info(
            "Updating nuisance parameter data products to starting values.")
        self.update_nuisance(nuisance_starting)
        self.lnprob = None
Example #9
0


myInstrument = TRES()
#myHDF5Interface = HDF5Interface(config['HDF5_path'])

#Somehow parse the list parameters, vz and logOmega into secondary parameters.

stellar_Starting = config['stellar_params']
stellar_tuple = C.dictkeys_to_tuple(stellar_Starting)
#go for each item in stellar_tuple, and assign the appropriate covariance to it
#stellar_MH_cov = np.array([float(config["stellar_jump"][key]) for key in stellar_tuple])**2 \
#                 * np.identity(len(stellar_Starting))
stellar_MH_cov = np.array([float(config["stellar_jump"][key]) for key in stellar_tuple])**2

temulator = Emulator.open(config['PCA_path'])
#Call the emulator at the starting stellar parameters
pp = np.array([stellar_Starting["temp"], stellar_Starting["logg"], stellar_Starting["Z"]])
starting_Weights = temulator.draw_weights(pp)
stellar_Starting["weights"] = starting_Weights

weight_mu, weight_cov = temulator(pp)
weight_cov = weight_cov * config["frac_weight"]

stellar_MH_cov = np.concatenate((stellar_MH_cov, weight_cov))
stellar_MH_cov = stellar_MH_cov * np.identity(len(stellar_MH_cov))
print(len(stellar_MH_cov))

fix_logg = config.get("fix_logg", None)

#Updating specific correlations to speed mixing
Example #10
0
# Setup an HDF5 interface in order to allow much quicker reading and writing
# than compared to loading FITS files over and over again.
from Starfish.grid_tools.instruments import SPEX
from Starfish.grid_tools import HDF5Creator
creator = HDF5Creator(grid,
                      "F_SPEX_grid.hdf5",
                      instrument=SPEX(),
                      wl_range=(0.9e4, np.inf),
                      ranges=ranges)
creator.process_grid()

#%%

# use the HDF5 Interface to consrtuct the spectral emulator
from Starfish.emulator import Emulator
emu = Emulator.from_grid("F_SPEX_grid.hdf5")
print(emu)

#%%

# train the emulator (PCA)
emu.train(options=dict(maxiter=1e5))
print(emu)

# check that it trained properly, the GPs should have smooth lines with small
# errors conecting the weights
from Starfish.emulator.plotting import plot_emulator
plot_emulator(emu)

#%%
Example #11
0
wl = dataSpec.wls[0]

# Truncate these to our shorter range to make it faster
# ind = (wl > 5165.) & (wl < 5185.)
# wl = wl[ind]
#
fl = dataSpec.fls[0]  #[ind]
sigma = dataSpec.sigmas[0]  #[ind]
# mask = dataSpec.masks[0][ind]
ndata = len(wl)

print("ndata", ndata)
print("Data wl range", wl[0], wl[-1])

# Set up the emulator for this chunk
emulator = Emulator.open()
emulator.determine_chunk_log(wl)

pca = emulator.pca

wl_FFT_orig = pca.wl

print("FFT length", len(wl_FFT_orig))
print(wl_FFT_orig[0], wl_FFT_orig[-1])

# The raw eigenspectra and mean flux components
EIGENSPECTRA = np.vstack((pca.flux_mean[np.newaxis, :],
                          pca.flux_std[np.newaxis, :], pca.eigenspectra))

ss = np.fft.rfftfreq(pca.npix, d=emulator.dv)
ss[0] = 0.01  # junk so we don't get a divide by zero error
Example #12
0
 def test_creation_from_string(self, mock_hdf5):
     emu = Emulator.from_grid(mock_hdf5)
     assert emu._trained is False
     assert np.allclose(emu._grid_sep, [100, 0.5, 0.5])  # issue 134
Example #13
0
def mock_emulator(mock_hdf5_interface):
    yield Emulator.from_grid(mock_hdf5_interface)
Example #14
0
    def initialize(self, key):
        '''
        Initialize the OrderModel to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_id)
        :param type: (int, int)

        This should only be called after all subprocess have been forked.
        '''

        self.id = key
        self.spectrum_id, self.order_id = self.id

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_id))

        self.instrument = Instruments[self.spectrum_id]
        self.DataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.DataSpectrum.wls[self.order_id]
        self.fl = self.DataSpectrum.fls[self.order_id]
        self.sigma = self.DataSpectrum.sigmas[self.order_id]
        self.npoints = len(self.wl)
        self.mask = self.DataSpectrum.masks[self.order_id]
        self.order = self.DataSpectrum.orders[self.order_id]

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.npoly = config["cheb_degree"]
        self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly)
        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.Emulator = Emulator.open(config["PCA_path"]) # Returns mu and var vectors
        self.Emulator.determine_chunk_log(self.wl) # Truncates the grid to this wl format, power of 2

        pg = self.Emulator.PCAGrid

        self.wl_FFT = pg.wl
        self.ncomp = pg.ncomp

        self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis,:], pg.flux_std[np.newaxis,:], pg.pcomps))

        self.min_v = self.Emulator.min_v
        self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        self.pcomps = np.empty((self.ncomp, self.npoints))
        self.flux_mean = np.empty((self.npoints,))
        self.flux_std = np.empty((self.npoints,))
        self.mus, self.vars = None, None
        self.C_GP = None
        self.data_mat = None

        self.sigma_matrix = self.sigma**2 * np.eye(self.npoints)

        self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob
        self.nregions = 0
        self.exceptions = []

        #TODO: perturb
        #if args.perturb:
            #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb)

        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,))
        cheb_tuple = ("logc0",)
        # add in new coefficients
        for i in range(1, self.npoly):
            cheb_tuple += ("c{}".format(i),)
        # set starting position to 0
        cheb_Starting = {k:0.0 for k in cheb_tuple}

        # Design cov starting
        cov_Starting = config['cov_params']
        cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting)
        cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in cov_tuple])**2

        nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov)))
        nuisance_starting = {"cheb": cheb_Starting, "cov": cov_Starting, "regions":{}}

        # Because this initialization is happening on the subprocess, I think
        # the random state should be fine.

        # Update the outdir based upon id
        self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order)

        # Create the nuisance parameter sampler to run independently
        self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov,
                                       debug=True, outdir=self.noutdir, order=self.order)
        self.p0 = self.sampler.p0

        # Udpate the nuisance parameters to the starting values so that we at
        # least have a self.data_mat
        self.logger.info("Updating nuisance parameter data products to starting values.")
        self.update_nuisance(nuisance_starting)
        self.lnprob = None
# than compared to loading FITS files over and over again.
from Starfish.grid_tools.instruments import IGRINS_H_custom
from Starfish.grid_tools import HDF5Creator


creator = HDF5Creator(
    grid, "IGRINS_grid.hdf5",instrument=IGRINS_H_custom(), 
    wl_range=(16600, 16700), ranges=ranges)
creator.process_grid()


#%%

from Starfish.emulator import Emulator

emu = Emulator.from_grid("IGRINS_grid.hdf5")
print(emu)

#%%
emu.train(options=dict(maxiter=1e5))
print(emu)

from Starfish.emulator.plotting import plot_emulator

plot_emulator(emu)

#%%

emu.save("IGRINS_emu.hdf5")