class Order: def __init__(self, debug=False): ''' This object contains all of the variables necessary for the partial lnprob calculation for one echelle order. It is designed to first be instantiated within the main processes and then forked to other subprocesses. Once operating in the subprocess, the variables specific to the order are loaded with an `INIT` message call, which tells which key to initialize on in the `self.initialize()`. ''' self.lnprob = -np.inf self.lnprob_last = -np.inf self.debug = debug def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) #self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.emulator = Emulator.open() self.emulator.determine_chunk_log(self.wl) self.pca = self.emulator.pca self.wl_FFT = self.pca.wl # The raw eigenspectra and mean flux components self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra)) self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error # Holders to store the convolved and resampled eigenspectra self.eigenspectra = np.empty((self.pca.m, self.ndata)) self.flux_mean = np.empty((self.ndata,)) self.flux_std = np.empty((self.ndata,)) self.flux_scalar = None self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.mus, self.C_GP, self.data_mat = None, None, None self.Omega = None self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order) def evaluate(self): ''' Return the lnprob using the current version of the C_GP matrix, data matrix, and other intermediate products. ''' self.lnprob_last = self.lnprob X = (self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T) part1 = X.dot(self.C_GP.dot(X.T)) part2 = self.data_mat CC = part2 + part1 try: factor, flag = cho_factor(CC) except np.linalg.linalg.LinAlgError: print("Spectrum:", self.spectrum_id, "Order:", self.order) self.CC_debugger(CC) np.save('X.npy', X) np.save('part1.npy', part1) np.save('part2.npy', part2) np.save('flux_mean.npy', self.flux_mean) np.save('flux_std.npy', self.flux_std) np.save('C_GP.npy', self.C_GP) raise try: model1 = (self.flux_mean + X.dot(self.mus)) R = self.fl - model1 logdet = np.sum(2 * np.log((np.diag(factor)))) self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet) self.logger.debug("Evaluating lnprob={}".format(self.lnprob)) return self.lnprob # To give us some debugging information about what went wrong. except np.linalg.linalg.LinAlgError: print("Spectrum:", self.spectrum_id, "Order:", self.order) raise def CC_debugger(self, CC): ''' Special debugging information for the covariance matrix decomposition. ''' print('{:-^60}'.format('CC_debugger')) print("See https://github.com/iancze/Starfish/issues/26") np.save('CC_matrix.npy', CC) print("Covariance matrix at a glance:") if (CC.diagonal().min() < 0.0): print("- Negative entries on the diagonal:") print("\t- Check sigAmp: should be positive") print("\t- Check uncertainty estimates: should all be positive") elif np.any(np.isnan(CC.diagonal())): print("- Covariance matrix has a NaN value on the diagonal") else: if not np.allclose(CC, CC.T): print("- The covariance matrix is highly asymmetric") #Still might have an asymmetric matrix below `allclose` threshold evals_CC, evecs_CC = np.linalg.eigh(CC) n_neg = (evals_CC < 0).sum() n_tot = len(evals_CC) print("- There are {} negative eigenvalues out of {}.".format(n_neg, n_tot)) mark = lambda val: '>' if val < 0 else '.' print("Covariance matrix eigenvalues:") print(*["{: >6} {:{fill}>20.3e}".format(i, evals_CC[i], fill=mark(evals_CC[i])) for i in range(10)], sep='\n') print('{: >15}'.format('...')) print(*["{: >6} {:{fill}>20.3e}".format(n_tot-10+i, evals_CC[-10+i], fill=mark(evals_CC[-10+i])) for i in range(10)], sep='\n') print('{:-^60}'.format('-')) def update_Theta(self, p): ''' Update the model to the current Theta parameters. :param p: parameters to update model to :type p: model.ThetaParam ''' # durty HACK to get fixed logg # Simply fixes the middle value to be 4.29 # Check to see if it exists, as well fix_logg = Starfish.config.get("fix_logg", None) if fix_logg is not None: p.grid[1] = fix_logg #print("grid pars are", p.grid) self.logger.debug("Updating Theta parameters to {}".format(p)) # Store the current accepted values before overwriting with new proposed values. self.flux_mean_last = self.flux_mean.copy() self.flux_std_last = self.flux_std.copy() self.eigenspectra_last = self.eigenspectra.copy() self.mus_last = self.mus self.C_GP_last = self.C_GP # Local, shifted copy of wavelengths wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz)) # If vsini is less than 0.2 km/s, we might run into issues with # the grid spacing. Therefore skip the convolution step if we have # values smaller than this. # FFT and convolve operations if p.vsini < 0.0: raise C.ModelError("vsini must be positive") elif p.vsini < 0.2: # Skip the vsini taper due to instrumental effects eigenspectra_full = self.EIGENSPECTRA.copy() else: FF = np.fft.rfft(self.EIGENSPECTRA, axis=1) # Determine the stellar broadening kernel ub = 2. * np.pi * p.vsini * self.ss sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3) # set zeroth frequency to 1 separately (DC term) sb[0] = 1. # institute vsini taper FF_tap = FF * sb # do ifft eigenspectra_full = np.fft.irfft(FF_tap, self.pca.npix, axis=1) # Spectrum resample operations if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT): raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT))) # Take the output from the FFT operation (eigenspectra_full), and stuff them # into respective data products for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.eigenspectra), eigenspectra_full): interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5) lres[:] = interp(self.wl) del interp # Helps keep memory usage low, seems like the numpy routine is slow # to clear allocated memory for each iteration. gc.collect() # Adjust flux_mean and flux_std by Omega #Omega = 10**p.logOmega #self.flux_mean *= Omega #self.flux_std *= Omega # Now update the parameters from the emulator # If pars are outside the grid, Emulator will raise C.ModelError self.emulator.params = p.grid self.mus, self.C_GP = self.emulator.matrix self.flux_scalar = self.emulator.absolute_flux self.Omega = 10**p.logOmega self.flux_mean *= (self.Omega*self.flux_scalar) self.flux_std *= (self.Omega*self.flux_scalar)
def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) #self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.emulator = Emulator.open() self.emulator.determine_chunk_log(self.wl) self.pca = self.emulator.pca self.wl_FFT = self.pca.wl # The raw eigenspectra and mean flux components self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra)) self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error # Holders to store the convolved and resampled eigenspectra self.eigenspectra = np.empty((self.pca.m, self.ndata)) self.flux_mean = np.empty((self.ndata,)) self.flux_std = np.empty((self.ndata,)) self.flux_scalar = None self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.mus, self.C_GP, self.data_mat = None, None, None self.Omega = None self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
class Order: def __init__(self, debug=False): ''' This object contains all of the variables necessary for the partial lnprob calculation for one echelle order. It is designed to first be instantiated within the main processes and then forked to other subprocesses. Once operating in the subprocess, the variables specific to the order are loaded with an `INIT` message call, which tells which key to initialize on in the `self.initialize()`. ''' self.lnprob = -np.inf self.lnprob_last = -np.inf self.func_dict = {"INIT": self.initialize, "DECIDE": self.decide_Theta, "INST": self.instantiate, "LNPROB": self.lnprob_Theta, "GET_LNPROB": self.get_lnprob, "FINISH": self.finish, "SAVE": self.save, "OPTIMIZE_CHEB": self.optimize_Cheb } self.debug = debug self.logger = logging.getLogger("{}".format(self.__class__.__name__)) def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.emulator = Emulator.open() self.emulator.determine_chunk_log(self.wl) self.pca = self.emulator.pca self.wl_FFT = self.pca.wl # The raw eigenspectra and mean flux components self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra)) self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error # Holders to store the convolved and resampled eigenspectra self.eigenspectra = np.empty((self.pca.m, self.ndata)) self.flux_mean = np.empty((self.ndata,)) self.flux_std = np.empty((self.ndata,)) self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.mus, self.C_GP, self.data_mat = None, None, None self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order) def instantiate(self, *args): ''' If mixing Theta and Phi optimization/sampling, perform the sigma clipping operation to instantiate covariant regions to cover outliers. May involve creating a new NuisanceSampler. ''' raise NotImplementedError def get_lnprob(self, *args): ''' Return the *current* value of lnprob. Intended to be called from the master process to query the child processes for their current value of lnprob. ''' return self.lnprob def lnprob_Theta(self, p): ''' Update the model to the Theta parameters and then evaluate the lnprob. Intended to be called from the master process via the command "LNPROB". ''' try: self.update_Theta(p) lnp = self.evaluate() # Also sets self.lnprob to new value return lnp except C.ModelError: self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(p)) return -np.inf def evaluate(self): ''' Return the lnprob using the current version of the C_GP matrix, data matrix, and other intermediate products. ''' self.lnprob_last = self.lnprob X = (self.chebyshevSpectrum.k * self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T) CC = X.dot(self.C_GP.dot(X.T)) + self.data_mat try: factor, flag = cho_factor(CC) except np.linalg.linalg.LinAlgError: print("Spectrum:", self.spectrum_id, "Order:", self.order) self.CC_debugger(CC) raise try: R = self.fl - self.chebyshevSpectrum.k * self.flux_mean - X.dot(self.mus) logdet = np.sum(2 * np.log((np.diag(factor)))) self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet) self.logger.debug("Evaluating lnprob={}".format(self.lnprob)) return self.lnprob # To give us some debugging information about what went wrong. except np.linalg.linalg.LinAlgError: print("Spectrum:", self.spectrum_id, "Order:", self.order) raise def CC_debugger(self, CC): ''' Special debugging information for the covariance matrix decomposition. ''' print('{:-^60}'.format('CC_debugger')) print("See https://github.com/iancze/Starfish/issues/26") print("Covariance matrix at a glance:") if (CC.diagonal().min() < 0.0): print("- Negative entries on the diagonal:") print("\t- Check sigAmp: should be positive") print("\t- Check uncertainty estimates: should all be positive") elif np.any(np.isnan(CC.diagonal())): print("- Covariance matrix has a NaN value on the diagonal") else: if not np.allclose(CC, CC.T): print("- The covariance matrix is highly asymmetric") #Still might have an asymmetric matrix below `allclose` threshold evals_CC, evecs_CC = np.linalg.eigh(CC) n_neg = (evals_CC < 0).sum() n_tot = len(evals_CC) print("- There are {} negative eigenvalues out of {}.".format(n_neg, n_tot)) mark = lambda val: '>' if val < 0 else '.' print("Covariance matrix eigenvalues:") print(*["{: >6} {:{fill}>20.3e}".format(i, evals_CC[i], fill=mark(evals_CC[i])) for i in range(10)], sep='\n') print('{: >15}'.format('...')) print(*["{: >6} {:{fill}>20.3e}".format(n_tot-10+i, evals_CC[-10+i], fill=mark(evals_CC[-10+i])) for i in range(10)], sep='\n') print('{:-^60}'.format('-')) def update_Theta(self, p): ''' Update the model to the current Theta parameters. :param p: parameters to update model to :type p: model.ThetaParam ''' # durty HACK to get fixed logg # Simply fixes the middle value to be 4.29 # Check to see if it exists, as well fix_logg = Starfish.config.get("fix_logg", None) if fix_logg is not None: p.grid[1] = fix_logg print("grid pars are", p.grid) self.logger.debug("Updating Theta parameters to {}".format(p)) # Store the current accepted values before overwriting with new proposed values. self.flux_mean_last = self.flux_mean.copy() self.flux_std_last = self.flux_std.copy() self.eigenspectra_last = self.eigenspectra.copy() self.mus_last = self.mus self.C_GP_last = self.C_GP # Local, shifted copy of wavelengths wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz)) # If vsini is less than 0.2 km/s, we might run into issues with # the grid spacing. Therefore skip the convolution step if we have # values smaller than this. # FFT and convolve operations if p.vsini < 0.0: raise C.ModelError("vsini must be positive") elif p.vsini < 0.2: # Skip the vsini taper due to instrumental effects eigenspectra_full = self.EIGENSPECTRA.copy() else: FF = np.fft.rfft(self.EIGENSPECTRA, axis=1) # Determine the stellar broadening kernel ub = 2. * np.pi * p.vsini * self.ss sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3) # set zeroth frequency to 1 separately (DC term) sb[0] = 1. # institute vsini taper FF_tap = FF * sb # do ifft eigenspectra_full = np.fft.irfft(FF_tap, self.pca.npix, axis=1) # Spectrum resample operations if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT): raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT))) # Take the output from the FFT operation (eigenspectra_full), and stuff them # into respective data products for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.eigenspectra), eigenspectra_full): interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5) lres[:] = interp(self.wl) del interp # Helps keep memory usage low, seems like the numpy routine is slow # to clear allocated memory for each iteration. gc.collect() # Adjust flux_mean and flux_std by Omega Omega = 10**p.logOmega self.flux_mean *= Omega self.flux_std *= Omega # Now update the parameters from the emulator # If pars are outside the grid, Emulator will raise C.ModelError self.emulator.params = p.grid self.mus, self.C_GP = self.emulator.matrix def revert_Theta(self): ''' Revert the status of the model from a rejected Theta proposal. ''' self.logger.debug("Reverting Theta parameters") self.lnprob = self.lnprob_last self.flux_mean = self.flux_mean_last self.flux_std = self.flux_std_last self.eigenspectra = self.eigenspectra_last self.mus = self.mus_last self.C_GP = self.C_GP_last def decide_Theta(self, yes): ''' Interpret the decision from the master process to either revert the Theta model (rejected parameters) or move on (accepted parameters). :param yes: if True, accept stellar parameters. :type yes: boolean ''' if yes: # accept and move on self.logger.debug("Deciding to accept Theta parameters") else: # revert and move on self.logger.debug("Deciding to revert Theta parameters") self.revert_Theta() # Proceed with independent sampling self.independent_sample(1) def optimize_Cheb(self, *args): ''' Keeping the current Theta parameters fixed and assuming white noise, optimize the Chebyshev parameters ''' if self.chebyshevSpectrum.fix_c0: p0 = np.zeros((self.npoly - 1)) self.fix_c0 = True else: p0 = np.zeros((self.npoly)) self.fix_c0 = False def fprob(p): self.chebyshevSpectrum.update(p) lnp = self.evaluate() print(self.order, p, lnp) if lnp == -np.inf: return 1e99 else: return -lnp from scipy.optimize import fmin result = fmin(fprob, p0, maxiter=10000, maxfun=10000) print(self.order, result) # Due to a JSON bug, np.int64 type objects will get read twice, # and cause this routine to fail. Therefore we have to be careful # to convert these to ints. phi = PhiParam(spectrum_id=int(self.spectrum_id), order=int(self.order), fix_c0=self.chebyshevSpectrum.fix_c0, cheb=result) phi.save() def update_Phi(self, p): ''' Update the Phi parameters and data covariance matrix. :param params: large dictionary containing cheb, cov, and regions ''' raise NotImplementedError def revert_Phi(self, *args): ''' Revert all products from the nuisance parameters, including the data covariance matrix. ''' self.logger.debug("Reverting Phi parameters") self.lnprob = self.lnprob_last self.chebyshevSpectrum.revert() self.data_mat = self.data_mat_last def clear_resid_deque(self): ''' Clear the accumulated residual spectra. ''' self.resid_deque.clear() def independent_sample(self, niter): ''' Do the independent sampling specific to this echelle order, using the attached self.sampler (NuisanceSampler). :param niter: number of iterations to complete before returning to master process. ''' self.logger.debug("Beginning independent sampling on Phi parameters") if self.lnprob: # If we have a current value, pass it to the sampler self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter, lnprob0=self.lnprob) else: # Otherwise, start from the beginning self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter) self.logger.debug("Finished independent sampling on Phi parameters") # Don't return anything to the master process. def finish(self, *args): ''' Wrap up the sampling and write the samples to disk. ''' self.logger.debug("Finishing") def brain(self, conn): ''' The infinite loop of the subprocess, which continues to listen for messages on the pipe. ''' self.conn = conn alive = True while alive: #Keep listening for messages put on the Pipe alive = self.interpret() #Once self.interpret() returns `False`, this loop will die. self.conn.send("DEAD") def interpret(self): ''' Interpret the messages being put into the Pipe, and do something with them. Messages are always sent in a 2-arg tuple (fname, arg) Right now we only expect one function and one argument but this could be generalized to **args. ''' #info("brain") fname, arg = self.conn.recv() # Waits here to receive a new message self.logger.debug("{} received message {}".format(os.getpid(), (fname, arg))) func = self.func_dict.get(fname, False) if func: response = func(arg) else: self.logger.info("Given an unknown function {}, assuming kill signal.".format(fname)) return False # Functions only return a response other than None when they want them # communicated back to the master process. # Some commands sent to the child processes do not require a response # to the main process. if response: self.logger.debug("{} sending back {}".format(os.getpid(), response)) self.conn.send(response) return True def save(self, *args): ''' Using the current values for flux, write out the data, mean model, and mean residuals into a JSON. ''' X = (self.chebyshevSpectrum.k * self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T) model = self.chebyshevSpectrum.k * self.flux_mean + X.dot(self.mus) resid = self.fl - model my_dict = {"wl":self.wl.tolist(), "data":self.fl.tolist(), "model":model.tolist(), "resid":resid.tolist(), "sigma":self.sigma.tolist(), "spectrum_id":self.spectrum_id, "order":self.order} fname = Starfish.specfmt.format(self.spectrum_id, self.order) f = open(fname + "spec.json", 'w') json.dump(my_dict, f, indent=2, sort_keys=True) f.close()
def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.emulator = Emulator.open() self.emulator.determine_chunk_log(self.wl) self.pca = self.emulator.pca self.wl_FFT = self.pca.wl # The raw eigenspectra and mean flux components self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra)) self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error # Holders to store the convolved and resampled eigenspectra self.eigenspectra = np.empty((self.pca.m, self.ndata)) self.flux_mean = np.empty((self.ndata,)) self.flux_std = np.empty((self.ndata,)) self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.mus, self.C_GP, self.data_mat = None, None, None self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
class Order: def __init__(self, debug=False): ''' This object contains all of the variables necessary for the partial lnprob calculation for one echelle order. It is designed to first be instantiated within the main processes and then forked to other subprocesses. Once operating in the subprocess, the variables specific to the order are loaded with an `INIT` message call, which tells which key to initialize on in the `self.initialize()`. ''' self.lnprob = -np.inf self.lnprob_last = -np.inf self.func_dict = { "INIT": self.initialize, "DECIDE": self.decide_Theta, "INST": self.instantiate, "LNPROB": self.lnprob_Theta, "GET_LNPROB": self.get_lnprob, "FINISH": self.finish, "SAVE": self.save, "OPTIMIZE_CHEB": self.optimize_Cheb } self.debug = debug self.logger = logging.getLogger("{}".format(self.__class__.__name__)) def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format( self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) self.resid_deque = deque( maxlen=500 ) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.interpolator = Interpolator(self.wl, HDF5Interface()) self.flux = None # Where the interpolator will store the flux self.wl_FFT = self.interpolator.wl # The raw eigenspectra and mean flux components self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.interpolator.interface.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format( self.spectrum_id, self.order) def instantiate(self, *args): ''' If mixing Theta and Phi optimization/sampling, perform the sigma clipping operation to instantiate covariant regions to cover outliers. May involve creating a new NuisanceSampler. ''' raise NotImplementedError def get_lnprob(self, *args): ''' Return the *current* value of lnprob. Intended to be called from the master process to query the child processes for their current value of lnprob. ''' return self.lnprob def lnprob_Theta(self, p): ''' Update the model to the Theta parameters and then evaluate the lnprob. Intended to be called from the master process via the command "LNPROB". ''' try: self.update_Theta(p) lnp = self.evaluate() # Also sets self.lnprob to new value return lnp except (C.ModelError, C.InterpolationError): self.logger.debug( "ModelError in stellar parameters, sending back -np.inf {}". format(p)) return -np.inf def evaluate(self): ''' Return the lnprob using the current version of the C_GP matrix, data matrix, and other intermediate products. ''' self.lnprob_last = self.lnprob CC = self.data_mat model = self.chebyshevSpectrum.k * self.flux try: factor, flag = cho_factor(CC) R = self.fl - model logdet = np.sum(2 * np.log((np.diag(factor)))) self.lnprob = -0.5 * (np.dot(R, cho_solve( (factor, flag), R)) + logdet) self.logger.debug("Evaluating lnprob={}".format(self.lnprob)) return self.lnprob # To give us some debugging information about what went wrong. except np.linalg.linalg.LinAlgError: print("Spectrum:", self.spectrum_id, "Order:", self.order) raise def update_Theta(self, p): ''' Update the model to the current Theta parameters. :param p: parameters to update model to :type p: model.ThetaParam ''' # Dirty hack fix_logg = Starfish.config.get("fix_logg", None) if fix_logg is not None: p.grid[1] = fix_logg print("grid pars are", p.grid) self.logger.debug("Updating Theta parameters to {}".format(p)) # Store the current accepted values before overwriting with new proposed values. self.flux_last = self.flux # Local, shifted copy of wavelengths wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz)) flux_raw = self.interpolator(p.grid) # If vsini is less than 0.2 km/s, we might run into issues with # the grid spacing. Therefore skip the convolution step if we have # values smaller than this. # FFT and convolve operations if p.vsini < 0.0: raise C.ModelError("vsini must be positive") elif p.vsini < 0.2: # Skip the vsini taper due to instrumental effects flux_taper = flux_raw else: FF = np.fft.rfft(flux_raw) # Determine the stellar broadening kernel ub = 2. * np.pi * p.vsini * self.ss sb = j1(ub) / ub - 3 * np.cos(ub) / ( 2 * ub**2) + 3. * np.sin(ub) / (2 * ub**3) # set zeroth frequency to 1 separately (DC term) sb[0] = 1. # institute vsini taper FF_tap = FF * sb # do ifft flux_taper = np.fft.irfft(FF_tap, len(self.wl_FFT)) # Spectrum resample operations if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT): raise RuntimeError( "Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})" .format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT))) # Take the output from the FFT operation and stuff it into the respective data products interp = InterpolatedUnivariateSpline(wl_FFT, flux_taper, k=5) self.flux = interp(self.wl) del interp gc.collect() # Adjust flux_mean and flux_std by Omega Omega = 10**p.logOmega self.flux *= Omega def revert_Theta(self): ''' Revert the status of the model from a rejected Theta proposal. ''' self.logger.debug("Reverting Theta parameters") self.lnprob = self.lnprob_last self.flux = self.flux_last def decide_Theta(self, yes): ''' Interpret the decision from the master process to either revert the Theta model (rejected parameters) or move on (accepted parameters). :param yes: if True, accept stellar parameters. :type yes: boolean ''' if yes: # accept and move on self.logger.debug("Deciding to accept Theta parameters") else: # revert and move on self.logger.debug("Deciding to revert Theta parameters") self.revert_Theta() # Proceed with independent sampling self.independent_sample(1) def optimize_Cheb(self, *args): ''' Keeping the current Theta parameters fixed and assuming white noise, optimize the Chebyshev parameters ''' # self.fix_c0 = True if index == (len(DataSpectrum.wls) - 1) else False #Fix the last c0 # This is necessary if we want to update just a single order. if self.chebyshevSpectrum.fix_c0 & len(self.dataSpectrum.wls) > 1: p0 = np.zeros((self.npoly - 1)) else: self.chebyshevSpectrum.fix_c0 = False p0 = np.zeros((self.npoly)) def fprob(p): self.chebyshevSpectrum.update(p) lnp = self.evaluate() print(self.order, p, lnp) if lnp == -np.inf: return 1e99 else: return -lnp from scipy.optimize import fmin result = fmin(fprob, p0, maxiter=10000, maxfun=10000) print(self.order, result) # Due to a JSON bug, np.int64 type objects will get read twice, # and cause this routine to fail. Therefore we have to be careful # to convert these to ints. phi = PhiParam(spectrum_id=int(self.spectrum_id), order=int(self.order), fix_c0=self.chebyshevSpectrum.fix_c0, cheb=result) phi.save() def update_Phi(self, p): ''' Update the Phi parameters and data covariance matrix. :param params: large dictionary containing cheb, cov, and regions ''' raise NotImplementedError def revert_Phi(self, *args): ''' Revert all products from the nuisance parameters, including the data covariance matrix. ''' self.logger.debug("Reverting Phi parameters") self.lnprob = self.lnprob_last self.chebyshevSpectrum.revert() self.data_mat = self.data_mat_last def clear_resid_deque(self): ''' Clear the accumulated residual spectra. ''' self.resid_deque.clear() def independent_sample(self, niter): ''' Do the independent sampling specific to this echelle order, using the attached self.sampler (NuisanceSampler). :param niter: number of iterations to complete before returning to master process. ''' self.logger.debug("Beginning independent sampling on Phi parameters") if self.lnprob: # If we have a current value, pass it to the sampler self.p0, self.lnprob, state = self.sampler.run_mcmc( pos0=self.p0, N=niter, lnprob0=self.lnprob) else: # Otherwise, start from the beginning self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter) self.logger.debug("Finished independent sampling on Phi parameters") # Don't return anything to the master process. def finish(self, *args): ''' Wrap up the sampling and write the samples to disk. ''' self.logger.debug("Finishing") def brain(self, conn): ''' The infinite loop of the subprocess, which continues to listen for messages on the pipe. ''' self.conn = conn alive = True while alive: #Keep listening for messages put on the Pipe alive = self.interpret() #Once self.interpret() returns `False`, this loop will die. self.conn.send("DEAD") def interpret(self): ''' Interpret the messages being put into the Pipe, and do something with them. Messages are always sent in a 2-arg tuple (fname, arg) Right now we only expect one function and one argument but this could be generalized to **args. ''' #info("brain") fname, arg = self.conn.recv() # Waits here to receive a new message self.logger.debug("{} received message {}".format( os.getpid(), (fname, arg))) func = self.func_dict.get(fname, False) if func: response = func(arg) else: self.logger.info( "Given an unknown function {}, assuming kill signal.".format( fname)) return False # Functions only return a response other than None when they want them # communicated back to the master process. # Some commands sent to the child processes do not require a response # to the main process. if response: self.logger.debug("{} sending back {}".format( os.getpid(), response)) self.conn.send(response) return True def save(self, *args): ''' Using the current values for flux, write out the data, mean model, and mean residuals into a JSON. ''' resid = self.fl - self.flux my_dict = { "wl": self.wl.tolist(), "data": self.fl.tolist(), "model": self.flux.tolist(), "resid": resid.tolist(), "sigma": self.sigma.tolist(), "spectrum_id": self.spectrum_id, "order": self.order } fname = Starfish.specfmt.format(self.spectrum_id, self.order) f = open(fname + "spec.json", 'w') json.dump(my_dict, f, indent=2, sort_keys=True) f.close()
sigma_mat = sigma**2 * np.eye(ndata) mus, C_GP, data_mat = None, None, None # For each star # In the config file, list the astroseismic parameters as the starting grid parameters # Read this into a ThetaParam object grid = np.array(Starfish.config["Theta"]["grid"]) # Now update the parameters for the emulator # If pars are outside the grid, Emulator will raise C.ModelError emulator.params = grid mus, C_GP = emulator.matrix npoly = Starfish.config["cheb_degree"] chebyshevSpectrum = ChebyshevSpectrum(dataSpec, 0, npoly=npoly) chebyshevSpectrum.update(np.array(Starfish.config["chebs"])) def lnprob(p): vz, vsini, logOmega = p[:3] cheb = p[3:] chebyshevSpectrum.update(cheb) # Local, shifted copy of wavelengths wl_FFT = wl_FFT_orig * np.sqrt((C.c_kms + vz) / (C.c_kms - vz)) # Holders to store the convolved and resampled eigenspectra eigenspectra = np.empty((pca.m, ndata)) flux_mean = np.empty((ndata,)) flux_std = np.empty((ndata,))
def initialize(self, key): ''' Initialize the OrderModel to the correct chunk of data (echelle order). :param key: (spectrum_id, order_id) :param type: (int, int) This should only be called after all subprocess have been forked. ''' self.id = key self.spectrum_id, self.order_id = self.id self.logger.info("Initializing model on Spectrum {}, order {}.".format( self.spectrum_id, self.order_id)) self.instrument = Instruments[self.spectrum_id] self.DataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.DataSpectrum.wls[self.order_id] self.fl = self.DataSpectrum.fls[self.order_id] self.sigma = self.DataSpectrum.sigmas[self.order_id] self.npoints = len(self.wl) self.mask = self.DataSpectrum.masks[self.order_id] self.order = self.DataSpectrum.orders[self.order_id] self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.npoly = config["cheb_degree"] self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly) self.resid_deque = deque( maxlen=500 ) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.Emulator = Emulator.open( config["PCA_path"]) # Returns mu and var vectors self.Emulator.determine_chunk_log( self.wl) # Truncates the grid to this wl format, power of 2 pg = self.Emulator.PCAGrid self.wl_FFT = pg.wl self.ncomp = pg.ncomp self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis, :], pg.flux_std[np.newaxis, :], pg.pcomps)) self.min_v = self.Emulator.min_v self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.pcomps = np.empty((self.ncomp, self.npoints)) self.flux_mean = np.empty((self.npoints, )) self.flux_std = np.empty((self.npoints, )) self.mus, self.vars = None, None self.C_GP = None self.data_mat = None self.sigma_matrix = self.sigma**2 * np.eye(self.npoints) self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob self.nregions = 0 self.exceptions = [] #TODO: perturb #if args.perturb: #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb) cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, )) cheb_tuple = ("logc0", ) # add in new coefficients for i in range(1, self.npoly): cheb_tuple += ("c{}".format(i), ) # set starting position to 0 cheb_Starting = {k: 0.0 for k in cheb_tuple} # Design cov starting cov_Starting = config['cov_params'] cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting) cov_MH_cov = np.array( [float(config["cov_jump"][key]) for key in cov_tuple])**2 nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov))) nuisance_starting = { "cheb": cheb_Starting, "cov": cov_Starting, "regions": {} } # Because this initialization is happening on the subprocess, I think # the random state should be fine. # Update the outdir based upon id self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order) # Create the nuisance parameter sampler to run independently self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, order=self.order) self.p0 = self.sampler.p0 # Udpate the nuisance parameters to the starting values so that we at # least have a self.data_mat self.logger.info( "Updating nuisance parameter data products to starting values.") self.update_nuisance(nuisance_starting) self.lnprob = None
class Order: def __init__(self, debug=False): ''' This object contains all of the variables necessary for the partial lnprob calculation for one echelle order. It is designed to first be instantiated within the main processes and then forked to other subprocesses. Once operating in the subprocess, the variables specific to the order are loaded with an `INIT` message call, which tells which key to initialize on in the `self.initialize()`. ''' self.lnprob = -np.inf self.lnprob_last = -np.inf self.func_dict = {"INIT": self.initialize, "DECIDE": self.decide_Theta, "INST": self.instantiate, "LNPROB": self.lnprob_Theta, "GET_LNPROB": self.get_lnprob, "FINISH": self.finish, "SAVE": self.save, "OPTIMIZE_CHEB": self.optimize_Cheb } self.debug = debug self.logger = logging.getLogger("{}".format(self.__class__.__name__)) def initialize(self, key): ''' Initialize to the correct chunk of data (echelle order). :param key: (spectrum_id, order_key) :param type: (int, int) This method should only be called after all subprocess have been forked. ''' self.id = key spectrum_id, self.order_key = self.id # Make sure these are ints self.spectrum_id = int(spectrum_id) self.instrument = Instruments[self.spectrum_id] self.dataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.dataSpectrum.wls[self.order_key] self.fl = self.dataSpectrum.fls[self.order_key] self.sigma = self.dataSpectrum.sigmas[self.order_key] self.ndata = len(self.wl) self.mask = self.dataSpectrum.masks[self.order_key] self.order = int(self.dataSpectrum.orders[self.order_key]) self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key)) self.npoly = Starfish.config["cheb_degree"] self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly) # If the file exists, optionally initiliaze to the chebyshev values fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json" if os.path.exists(fname): self.logger.debug("Loading stored Chebyshev parameters.") phi = PhiParam.load(fname) self.chebyshevSpectrum.update(phi.cheb) self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.interpolator = Interpolator(self.wl, HDF5Interface()) self.flux = None # Where the interpolator will store the flux self.wl_FFT = self.interpolator.wl # The raw eigenspectra and mean flux components self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.interpolator.interface.dv) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.sigma_mat = self.sigma**2 * np.eye(self.ndata) self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob # self.nregions = 0 # self.exceptions = [] # Update the outdir based upon id self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order) def instantiate(self, *args): ''' If mixing Theta and Phi optimization/sampling, perform the sigma clipping operation to instantiate covariant regions to cover outliers. May involve creating a new NuisanceSampler. ''' raise NotImplementedError def get_lnprob(self, *args): ''' Return the *current* value of lnprob. Intended to be called from the master process to query the child processes for their current value of lnprob. ''' return self.lnprob def lnprob_Theta(self, p): ''' Update the model to the Theta parameters and then evaluate the lnprob. Intended to be called from the master process via the command "LNPROB". ''' try: self.update_Theta(p) lnp = self.evaluate() # Also sets self.lnprob to new value return lnp except (C.ModelError, C.InterpolationError): self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(p)) return -np.inf def evaluate(self): ''' Return the lnprob using the current version of the C_GP matrix, data matrix, and other intermediate products. ''' self.lnprob_last = self.lnprob CC = self.data_mat model = self.chebyshevSpectrum.k * self.flux try: factor, flag = cho_factor(CC) R = self.fl - model logdet = np.sum(2 * np.log((np.diag(factor)))) self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet) self.logger.debug("Evaluating lnprob={}".format(self.lnprob)) return self.lnprob # To give us some debugging information about what went wrong. except np.linalg.linalg.LinAlgError: print("Spectrum:", self.spectrum_id, "Order:", self.order) raise def update_Theta(self, p): ''' Update the model to the current Theta parameters. :param p: parameters to update model to :type p: model.ThetaParam ''' # Dirty hack fix_logg = Starfish.config.get("fix_logg", None) if fix_logg is not None: p.grid[1] = fix_logg print("grid pars are", p.grid) self.logger.debug("Updating Theta parameters to {}".format(p)) # Store the current accepted values before overwriting with new proposed values. self.flux_last = self.flux # Local, shifted copy of wavelengths wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz)) flux_raw = self.interpolator(p.grid) # If vsini is less than 0.2 km/s, we might run into issues with # the grid spacing. Therefore skip the convolution step if we have # values smaller than this. # FFT and convolve operations if p.vsini < 0.0: raise C.ModelError("vsini must be positive") elif p.vsini < 0.2: # Skip the vsini taper due to instrumental effects flux_taper = flux_raw else: FF = np.fft.rfft(flux_raw) # Determine the stellar broadening kernel ub = 2. * np.pi * p.vsini * self.ss sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3) # set zeroth frequency to 1 separately (DC term) sb[0] = 1. # institute vsini taper FF_tap = FF * sb # do ifft flux_taper = np.fft.irfft(FF_tap, len(self.wl_FFT)) # Spectrum resample operations if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT): raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT))) # Take the output from the FFT operation and stuff it into the respective data products interp = InterpolatedUnivariateSpline(wl_FFT, flux_taper, k=5) self.flux = interp(self.wl) del interp gc.collect() # Adjust flux_mean and flux_std by Omega Omega = 10**p.logOmega self.flux *= Omega def revert_Theta(self): ''' Revert the status of the model from a rejected Theta proposal. ''' self.logger.debug("Reverting Theta parameters") self.lnprob = self.lnprob_last self.flux = self.flux_last def decide_Theta(self, yes): ''' Interpret the decision from the master process to either revert the Theta model (rejected parameters) or move on (accepted parameters). :param yes: if True, accept stellar parameters. :type yes: boolean ''' if yes: # accept and move on self.logger.debug("Deciding to accept Theta parameters") else: # revert and move on self.logger.debug("Deciding to revert Theta parameters") self.revert_Theta() # Proceed with independent sampling self.independent_sample(1) def optimize_Cheb(self, *args): ''' Keeping the current Theta parameters fixed and assuming white noise, optimize the Chebyshev parameters ''' # self.fix_c0 = True if index == (len(DataSpectrum.wls) - 1) else False #Fix the last c0 # This is necessary if we want to update just a single order. if self.chebyshevSpectrum.fix_c0 & len(self.dataSpectrum.wls) > 1: p0 = np.zeros((self.npoly - 1)) else: self.chebyshevSpectrum.fix_c0 = False p0 = np.zeros((self.npoly)) def fprob(p): self.chebyshevSpectrum.update(p) lnp = self.evaluate() print(self.order, p, lnp) if lnp == -np.inf: return 1e99 else: return -lnp from scipy.optimize import fmin result = fmin(fprob, p0, maxiter=10000, maxfun=10000) print(self.order, result) # Due to a JSON bug, np.int64 type objects will get read twice, # and cause this routine to fail. Therefore we have to be careful # to convert these to ints. phi = PhiParam(spectrum_id=int(self.spectrum_id), order=int(self.order), fix_c0=self.chebyshevSpectrum.fix_c0, cheb=result) phi.save() def update_Phi(self, p): ''' Update the Phi parameters and data covariance matrix. :param params: large dictionary containing cheb, cov, and regions ''' raise NotImplementedError def revert_Phi(self, *args): ''' Revert all products from the nuisance parameters, including the data covariance matrix. ''' self.logger.debug("Reverting Phi parameters") self.lnprob = self.lnprob_last self.chebyshevSpectrum.revert() self.data_mat = self.data_mat_last def clear_resid_deque(self): ''' Clear the accumulated residual spectra. ''' self.resid_deque.clear() def independent_sample(self, niter): ''' Do the independent sampling specific to this echelle order, using the attached self.sampler (NuisanceSampler). :param niter: number of iterations to complete before returning to master process. ''' self.logger.debug("Beginning independent sampling on Phi parameters") if self.lnprob: # If we have a current value, pass it to the sampler self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter, lnprob0=self.lnprob) else: # Otherwise, start from the beginning self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter) self.logger.debug("Finished independent sampling on Phi parameters") # Don't return anything to the master process. def finish(self, *args): ''' Wrap up the sampling and write the samples to disk. ''' self.logger.debug("Finishing") def brain(self, conn): ''' The infinite loop of the subprocess, which continues to listen for messages on the pipe. ''' self.conn = conn alive = True while alive: #Keep listening for messages put on the Pipe alive = self.interpret() #Once self.interpret() returns `False`, this loop will die. self.conn.send("DEAD") def interpret(self): ''' Interpret the messages being put into the Pipe, and do something with them. Messages are always sent in a 2-arg tuple (fname, arg) Right now we only expect one function and one argument but this could be generalized to **args. ''' #info("brain") fname, arg = self.conn.recv() # Waits here to receive a new message self.logger.debug("{} received message {}".format(os.getpid(), (fname, arg))) func = self.func_dict.get(fname, False) if func: response = func(arg) else: self.logger.info("Given an unknown function {}, assuming kill signal.".format(fname)) return False # Functions only return a response other than None when they want them # communicated back to the master process. # Some commands sent to the child processes do not require a response # to the main process. if response: self.logger.debug("{} sending back {}".format(os.getpid(), response)) self.conn.send(response) return True def save(self, *args): ''' Using the current values for flux, write out the data, mean model, and mean residuals into a JSON. ''' resid = self.fl - self.flux my_dict = {"wl":self.wl.tolist(), "data":self.fl.tolist(), "model":self.flux.tolist(), "resid":resid.tolist(), "sigma":self.sigma.tolist(), "spectrum_id":self.spectrum_id, "order":self.order} fname = Starfish.specfmt.format(self.spectrum_id, self.order) f = open(fname + "spec.json", 'w') json.dump(my_dict, f, indent=2, sort_keys=True) f.close()
class OrderModel: def __init__(self, debug=False): ''' This object contains all of the variables necessary for the partial lnprob calculation for one echelle order. It is designed to first be instantiated within the main processes and then forked to other subprocesses. Once operating in the subprocess, the variables specific to the order are loaded with an `INIT` message call, which tells which key to initialize on in the `self.initialize()`. ''' self.lnprob = -np.inf self.lnprob_last = -np.inf self.func_dict = { "INIT": self.initialize, "DECIDE": self.decide_stellar, "INST": self.instantiate, "LNPROB": self.stellar_lnprob, "GET_LNPROB": self.get_lnprob, "FINISH": self.finish } self.debug = debug def initialize(self, key): ''' Initialize the OrderModel to the correct chunk of data (echelle order). :param key: (spectrum_id, order_id) :param type: (int, int) This should only be called after all subprocess have been forked. ''' self.id = key self.spectrum_id, self.order_id = self.id self.logger.info("Initializing model on Spectrum {}, order {}.".format( self.spectrum_id, self.order_id)) self.instrument = Instruments[self.spectrum_id] self.DataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.DataSpectrum.wls[self.order_id] self.fl = self.DataSpectrum.fls[self.order_id] self.sigma = self.DataSpectrum.sigmas[self.order_id] self.npoints = len(self.wl) self.mask = self.DataSpectrum.masks[self.order_id] self.order = self.DataSpectrum.orders[self.order_id] self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.npoly = config["cheb_degree"] self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly) self.resid_deque = deque( maxlen=500 ) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.Emulator = Emulator.open( config["PCA_path"]) # Returns mu and var vectors self.Emulator.determine_chunk_log( self.wl) # Truncates the grid to this wl format, power of 2 pg = self.Emulator.PCAGrid self.wl_FFT = pg.wl self.ncomp = pg.ncomp self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis, :], pg.flux_std[np.newaxis, :], pg.pcomps)) self.min_v = self.Emulator.min_v self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.pcomps = np.empty((self.ncomp, self.npoints)) self.flux_mean = np.empty((self.npoints, )) self.flux_std = np.empty((self.npoints, )) self.mus, self.vars = None, None self.C_GP = None self.data_mat = None self.sigma_matrix = self.sigma**2 * np.eye(self.npoints) self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob self.nregions = 0 self.exceptions = [] #TODO: perturb #if args.perturb: #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb) cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, )) cheb_tuple = ("logc0", ) # add in new coefficients for i in range(1, self.npoly): cheb_tuple += ("c{}".format(i), ) # set starting position to 0 cheb_Starting = {k: 0.0 for k in cheb_tuple} # Design cov starting cov_Starting = config['cov_params'] cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting) cov_MH_cov = np.array( [float(config["cov_jump"][key]) for key in cov_tuple])**2 nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov))) nuisance_starting = { "cheb": cheb_Starting, "cov": cov_Starting, "regions": {} } # Because this initialization is happening on the subprocess, I think # the random state should be fine. # Update the outdir based upon id self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order) # Create the nuisance parameter sampler to run independently self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, order=self.order) self.p0 = self.sampler.p0 # Udpate the nuisance parameters to the starting values so that we at # least have a self.data_mat self.logger.info( "Updating nuisance parameter data products to starting values.") self.update_nuisance(nuisance_starting) self.lnprob = None def instantiate(self, *args): ''' Clear the old NuisanceSampler, instantiate the regions using the stored residual spectra, and create a new NuisanceSampler. ''' # threshold for sigma clipping sigma = config["sigma_clip"] # array that specifies if a pixel is already covered. # to start, it should be all False covered = np.zeros((self.npoints, ), dtype='bool') #average all of the spectra in the deque together residual_array = np.array(self.resid_deque) if len(self.resid_deque) == 0: raise RuntimeError("No residual spectra stored yet.") else: residuals = np.average(residual_array, axis=0) # run the sigma_clip algorithm until converged, and we've identified the outliers filtered_data = sigma_clip(residuals, sig=sigma, iters=None) mask = filtered_data.mask wl = self.wl sigma0 = config['region_priors']['sigma0'] logAmp = config["region_params"]["logAmp"] sigma = config["region_params"]["sigma"] # Sort in decreasing strength of residual self.nregions = 0 regions = {} region_mus = {} for w, resid in sorted(zip(wl[mask], np.abs(residuals[mask])), key=itemgetter(1), reverse=True): if w in wl[covered]: continue else: # check to make sure region is not *right* at the edge of the echelle order if w <= np.min(wl) or w >= np.max(wl): continue else: # instantiate region and update coverage # Default amp and sigma values regions[self.nregions] = { "logAmp": logAmp, "sigma": sigma, "mu": w } region_mus[ self.nregions] = w # for evaluating the mu prior self.nregions += 1 # determine the stretch of wl covered by this new region ind = (wl >= (w - sigma0)) & (wl <= (w + sigma0)) # update the covered regions covered = covered | ind # Take the current nuisance positions as a starting point, and add the regions starting_dict = self.sampler.params.copy() starting_dict["regions"] = regions region_mus = np.array([region_mus[i] for i in range(self.nregions)]) # Setup the priors region_priors = config["region_priors"] region_priors.update({"mus": region_mus}) prior_params = {"regions": region_priors} # do all this crap again cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, )) cov_MH_cov = np.array([ float(config["cov_jump"][key]) for key in self.sampler.cov_tup ])**2 region_MH_cov = [ float(config["region_jump"][key])**2 for key in C.cov_region_parameters ] regions_MH_cov = np.array( [region_MH_cov for i in range(self.nregions)]).flatten() nuisance_MH_cov = np.diag( np.concatenate((cheb_MH_cov, cov_MH_cov, regions_MH_cov))) print(starting_dict) print("cov shape {}".format(nuisance_MH_cov.shape)) # Initialize a new sampler, replacing the old one self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=starting_dict, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, prior_params=prior_params, order=self.order) self.p0 = self.sampler.p0 # Update the nuisance parameters to the starting values so that we at least have a self.data_mat print("Updating nuisance parameter data products to starting values.") self.update_nuisance(starting_dict) self.lnprob = self.evaluate() # To speed up convergence, try just doing a bunch of nuisance runs before # going into the iteration pattern print("Doing nuisance burn-in for {} samples".format( config["nuisance_burn"])) self.independent_sample(config["nuisance_burn"]) def get_lnprob(self, *args): ''' Return the *current* value of lnprob. Intended to be called from the master process (StellarSampler.sample), to query the child processes for their current value of lnprob. ''' return self.lnprob def stellar_lnprob(self, params): ''' Update the model to the parameters and then evaluate the lnprob. Intended to be called from the master process via the command "LNPROB". ''' try: self.update_stellar(params) lnp = self.evaluate() # Also sets self.lnprob to new value return lnp except C.ModelError: self.logger.debug( "ModelError in stellar parameters, sending back -np.inf {}". format(params)) return -np.inf def evaluate(self): ''' Return the lnprob using the current version of the DataCovariance matrix and other intermediate products. ''' self.lnprob_last = self.lnprob X = (self.ChebyshevSpectrum.k * self.flux_std * np.eye(self.npoints)).dot(self.pcomps.T) CC = X.dot(self.C_GP.dot(X.T)) + self.data_mat R = self.fl - self.ChebyshevSpectrum.k * self.flux_mean - X.dot( self.mus) try: factor, flag = cho_factor(CC) except np.linalg.LinAlgError as e: self.logger.debug("self.sampler.params are {}".format( self.sampler.params)) raise C.ModelError("Can't Cholesky factor {}".format(e)) logdet = np.sum(2 * np.log((np.diag(factor)))) self.lnprob = -0.5 * (np.dot(R, cho_solve( (factor, flag), R)) + logdet) + self.prior if self.counter % 100 == 0: self.resid_deque.append(R) self.counter += 1 return self.lnprob def revert_stellar(self): ''' Revert the status of the model from a rejected stellar proposal. ''' self.logger.debug("Reverting stellar parameters") self.lnprob = self.lnprob_last self.flux_mean = self.flux_mean_last self.flux_std = self.flux_std_last self.pcomps = self.pcomps_last self.mus, self.vars = self.mus_last, self.vars_last self.C_GP = self.C_GP_last def update_stellar(self, params): ''' Update the model to the current stellar parameters. ''' self.logger.debug("Updating stellar parameters to {}".format(params)) # Store the current accepted values before overwriting with new proposed values. self.flux_mean_last = self.flux_mean self.flux_std_last = self.flux_std self.pcomps_last = self.pcomps self.mus_last, self.vars_last = self.mus, self.vars self.C_GP_last = self.C_GP #TODO: Possible speedups: # 1. Store the PCOMPS pre-FFT'd # Shift the velocity vz = params["vz"] # Local, shifted copy wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + vz) / (C.c_kms - vz)) # FFT and convolve operations vsini = params["vsini"] if vsini < 0.2: raise C.ModelError("vsini must be positive") FF = np.fft.rfft(self.PCOMPS, axis=1) # Determine the stellar broadening kernel ub = 2. * np.pi * vsini * self.ss sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub**2) + 3. * np.sin(ub) / ( 2 * ub**3) # set zeroth frequency to 1 separately (DC term) sb[0] = 1. # institute velocity and instrumental taper FF_tap = FF * sb # do ifft pcomps_full = np.fft.irfft(FF_tap, len(wl_FFT), axis=1) # Spectrum resample operations if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT): raise RuntimeError( "Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({" ":.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT))) # Take the output from the FFT operation (pcomps_full), and stuff them # into respective data products for lres, hres in zip( chain([self.flux_mean, self.flux_std], self.pcomps), pcomps_full): interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5) lres[:] = interp(self.wl) del interp gc.collect() # Adjust flux_mean and flux_std by Omega Omega = 10**params["logOmega"] self.flux_mean *= Omega self.flux_std *= Omega # Now update the parameters from the emulator pars = np.array([params["temp"], params["logg"], params["Z"]]) # If pars are outside the grid, Emulator will raise C.ModelError self.mus, self.vars = self.Emulator(pars) self.C_GP = self.vars * np.eye(self.ncomp) def decide_stellar(self, yes): ''' Interpret the decision from the master process to either revert the stellar model (rejected parameters) or move on (accepted parameters). ''' if yes: # accept and move on self.logger.debug("Deciding to accept stellar parameters") else: # revert and move on self.logger.debug("Deciding to revert stellar parameters") self.revert_stellar() # Proceed with independent sampling self.independent_sample(1) def update_nuisance(self, params): ''' Update the nuisance parameters and data covariance matrix. :param params: large dictionary containing cheb, cov, and regions ''' self.logger.debug("Updating nuisance parameters to {}".format(params)) # Read off the Chebyshev parameters and update self.ChebyshevSpectrum.update(params["cheb"]) # Create the full data covariance matrix. l = params["cov"]["l"] sigAmp = params["cov"]["sigAmp"] # Check to make sure the global covariance parameters make sense if sigAmp < 0.1: raise C.ModelError( "sigAmp shouldn't be lower than 0.1, something is wrong.") max_r = 6.0 * l # [km/s] # Check all regions, take the max if self.nregions > 0: regions = params["regions"] keys = sorted(regions) sigmas = np.array([regions[key]["sigma"] for key in keys]) #km/s #mus = np.array([regions[key]["mu"] for key in keys]) max_reg = 4.0 * np.max(sigmas) #If this is a larger distance than the global length, replace it max_r = max_reg if max_reg > max_r else max_r #print("Max_r now set by regions {}".format(max_r)) # print("max_r is {}".format(max_r)) # Create a partial function which returns the proper element. k_func = make_k_func(params) # Store the previous data matrix in case we want to revert later self.data_mat_last = self.data_mat self.data_mat = get_dense_C(self.wl, k_func=k_func, max_r=max_r) + sigAmp * self.sigma_matrix def revert_nuisance(self, *args): ''' Revert all products from the nuisance parameters, including the data covariance matrix. ''' self.logger.debug("Reverting nuisance parameters") self.lnprob = self.lnprob_last self.ChebyshevSpectrum.revert() self.data_mat = self.data_mat_last def clear_resid_deque(self): ''' Clear the accumulated residual spectra. ''' self.resid_deque.clear() def independent_sample(self, niter): ''' Do the independent sampling specific to this echelle order, using the attached self.sampler (NuisanceSampler). :param niter: number of iterations to complete before returning to master process. ''' self.logger.debug( "Beginning independent sampling on nuisance parameters") if self.lnprob: # If we have a current value, pass it to the sampler self.p0, self.lnprob, state = self.sampler.run_mcmc( pos0=self.p0, N=niter, lnprob0=self.lnprob) else: # Otherwise, start from the beginning self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter) self.logger.debug( "Finished independent sampling on nuisance parameters") # Don't return anything to the master process. def finish(self, *args): ''' Wrap up the sampling and write the samples to disk. ''' print(self.sampler.acceptance_fraction) print(self.sampler.acor) self.sampler.write() self.sampler.plot() # triangle_plot=True print("There were {} exceptions.".format(len(self.exceptions))) # print out the values of each region key. for exception in self.exceptions: regions = exception["regions"] keys = sorted(regions) for key in keys: print(regions[key]) cov = exception["cov"] print(cov) print("\n\n") def brain(self, conn): ''' The infinite loop of the subprocess, which continues to listen for messages on the pipe. ''' self.conn = conn alive = True while alive: #Keep listening for messages put on the Pipe alive = self.interpret() #Once self.interpret() returns `False`, this loop will die. self.conn.send("DEAD") def interpret(self): ''' Interpret the messages being put into the Pipe, and do something with them. Messages are always sent in a 2-arg tuple (fname, arg) Right now we only expect one function and one argument but this could be generalized to **args. ''' #info("brain") fname, arg = self.conn.recv() # Waits here to receive a new message self.logger.debug("{} received message {}".format( os.getpid(), (fname, arg))) func = self.func_dict.get(fname, False) if func: response = func(arg) else: self.logger.info( "Given an unknown function {}, assuming kill signal.".format( fname)) return False # Functions only return a response other than None when they want them # communicated back to the master process. # Some commands sent to the child processes do not require a response # to the main process. if response: self.logger.debug("{} sending back {}".format( os.getpid(), response)) self.conn.send(response) return True
sigma_mat = sigma**2 * np.eye(ndata) mus, C_GP, data_mat = None, None, None # For each star # In the config file, list the astroseismic parameters as the starting grid parameters # Read this into a ThetaParam object grid = np.array(Starfish.config["Theta"]["grid"]) # Now update the parameters for the emulator # If pars are outside the grid, Emulator will raise C.ModelError emulator.params = grid mus, C_GP = emulator.matrix npoly = Starfish.config["cheb_degree"] chebyshevSpectrum = ChebyshevSpectrum(dataSpec, 0, npoly=npoly) chebyshevSpectrum.update(np.array(Starfish.config["chebs"])) def lnprob(p): vz, vsini, logOmega = p[:3] cheb = p[3:] chebyshevSpectrum.update(cheb) # Local, shifted copy of wavelengths wl_FFT = wl_FFT_orig * np.sqrt((C.c_kms + vz) / (C.c_kms - vz)) # Holders to store the convolved and resampled eigenspectra eigenspectra = np.empty((pca.m, ndata)) flux_mean = np.empty((ndata, ))
def initialize(self, key): ''' Initialize the OrderModel to the correct chunk of data (echelle order). :param key: (spectrum_id, order_id) :param type: (int, int) This should only be called after all subprocess have been forked. ''' self.id = key self.spectrum_id, self.order_id = self.id self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_id)) self.instrument = Instruments[self.spectrum_id] self.DataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.DataSpectrum.wls[self.order_id] self.fl = self.DataSpectrum.fls[self.order_id] self.sigma = self.DataSpectrum.sigmas[self.order_id] self.npoints = len(self.wl) self.mask = self.DataSpectrum.masks[self.order_id] self.order = self.DataSpectrum.orders[self.order_id] self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.npoly = config["cheb_degree"] self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly) self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.Emulator = Emulator.open(config["PCA_path"]) # Returns mu and var vectors self.Emulator.determine_chunk_log(self.wl) # Truncates the grid to this wl format, power of 2 pg = self.Emulator.PCAGrid self.wl_FFT = pg.wl self.ncomp = pg.ncomp self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis,:], pg.flux_std[np.newaxis,:], pg.pcomps)) self.min_v = self.Emulator.min_v self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.pcomps = np.empty((self.ncomp, self.npoints)) self.flux_mean = np.empty((self.npoints,)) self.flux_std = np.empty((self.npoints,)) self.mus, self.vars = None, None self.C_GP = None self.data_mat = None self.sigma_matrix = self.sigma**2 * np.eye(self.npoints) self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob self.nregions = 0 self.exceptions = [] #TODO: perturb #if args.perturb: #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb) cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,)) cheb_tuple = ("logc0",) # add in new coefficients for i in range(1, self.npoly): cheb_tuple += ("c{}".format(i),) # set starting position to 0 cheb_Starting = {k:0.0 for k in cheb_tuple} # Design cov starting cov_Starting = config['cov_params'] cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting) cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in cov_tuple])**2 nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov))) nuisance_starting = {"cheb": cheb_Starting, "cov": cov_Starting, "regions":{}} # Because this initialization is happening on the subprocess, I think # the random state should be fine. # Update the outdir based upon id self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order) # Create the nuisance parameter sampler to run independently self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, order=self.order) self.p0 = self.sampler.p0 # Udpate the nuisance parameters to the starting values so that we at # least have a self.data_mat self.logger.info("Updating nuisance parameter data products to starting values.") self.update_nuisance(nuisance_starting) self.lnprob = None
class OrderModel: def __init__(self, debug=False): ''' This object contains all of the variables necessary for the partial lnprob calculation for one echelle order. It is designed to first be instantiated within the main processes and then forked to other subprocesses. Once operating in the subprocess, the variables specific to the order are loaded with an `INIT` message call, which tells which key to initialize on in the `self.initialize()`. ''' self.lnprob = -np.inf self.lnprob_last = -np.inf self.func_dict = {"INIT": self.initialize, "DECIDE": self.decide_stellar, "INST": self.instantiate, "LNPROB": self.stellar_lnprob, "GET_LNPROB": self.get_lnprob, "FINISH": self.finish } self.debug = debug def initialize(self, key): ''' Initialize the OrderModel to the correct chunk of data (echelle order). :param key: (spectrum_id, order_id) :param type: (int, int) This should only be called after all subprocess have been forked. ''' self.id = key self.spectrum_id, self.order_id = self.id self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_id)) self.instrument = Instruments[self.spectrum_id] self.DataSpectrum = DataSpectra[self.spectrum_id] self.wl = self.DataSpectrum.wls[self.order_id] self.fl = self.DataSpectrum.fls[self.order_id] self.sigma = self.DataSpectrum.sigmas[self.order_id] self.npoints = len(self.wl) self.mask = self.DataSpectrum.masks[self.order_id] self.order = self.DataSpectrum.orders[self.order_id] self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order)) if self.debug: self.logger.setLevel(logging.DEBUG) else: self.logger.setLevel(logging.INFO) self.npoly = config["cheb_degree"] self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly) self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging self.counter = 0 self.Emulator = Emulator.open(config["PCA_path"]) # Returns mu and var vectors self.Emulator.determine_chunk_log(self.wl) # Truncates the grid to this wl format, power of 2 pg = self.Emulator.PCAGrid self.wl_FFT = pg.wl self.ncomp = pg.ncomp self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis,:], pg.flux_std[np.newaxis,:], pg.pcomps)) self.min_v = self.Emulator.min_v self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v) self.ss[0] = 0.01 # junk so we don't get a divide by zero error self.pcomps = np.empty((self.ncomp, self.npoints)) self.flux_mean = np.empty((self.npoints,)) self.flux_std = np.empty((self.npoints,)) self.mus, self.vars = None, None self.C_GP = None self.data_mat = None self.sigma_matrix = self.sigma**2 * np.eye(self.npoints) self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob self.nregions = 0 self.exceptions = [] #TODO: perturb #if args.perturb: #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb) cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,)) cheb_tuple = ("logc0",) # add in new coefficients for i in range(1, self.npoly): cheb_tuple += ("c{}".format(i),) # set starting position to 0 cheb_Starting = {k:0.0 for k in cheb_tuple} # Design cov starting cov_Starting = config['cov_params'] cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting) cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in cov_tuple])**2 nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov))) nuisance_starting = {"cheb": cheb_Starting, "cov": cov_Starting, "regions":{}} # Because this initialization is happening on the subprocess, I think # the random state should be fine. # Update the outdir based upon id self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order) # Create the nuisance parameter sampler to run independently self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, order=self.order) self.p0 = self.sampler.p0 # Udpate the nuisance parameters to the starting values so that we at # least have a self.data_mat self.logger.info("Updating nuisance parameter data products to starting values.") self.update_nuisance(nuisance_starting) self.lnprob = None def instantiate(self, *args): ''' Clear the old NuisanceSampler, instantiate the regions using the stored residual spectra, and create a new NuisanceSampler. ''' # threshold for sigma clipping sigma=config["sigma_clip"] # array that specifies if a pixel is already covered. # to start, it should be all False covered = np.zeros((self.npoints,), dtype='bool') #average all of the spectra in the deque together residual_array = np.array(self.resid_deque) if len(self.resid_deque) == 0: raise RuntimeError("No residual spectra stored yet.") else: residuals = np.average(residual_array, axis=0) # run the sigma_clip algorithm until converged, and we've identified the outliers filtered_data = sigma_clip(residuals, sig=sigma, iters=None) mask = filtered_data.mask wl = self.wl sigma0 = config['region_priors']['sigma0'] logAmp = config["region_params"]["logAmp"] sigma = config["region_params"]["sigma"] # Sort in decreasing strength of residual self.nregions = 0 regions = {} region_mus = {} for w, resid in sorted(zip(wl[mask], np.abs(residuals[mask])), key=itemgetter(1), reverse=True): if w in wl[covered]: continue else: # check to make sure region is not *right* at the edge of the echelle order if w <= np.min(wl) or w >= np.max(wl): continue else: # instantiate region and update coverage # Default amp and sigma values regions[self.nregions] = {"logAmp":logAmp, "sigma":sigma, "mu":w} region_mus[self.nregions] = w # for evaluating the mu prior self.nregions += 1 # determine the stretch of wl covered by this new region ind = (wl >= (w - sigma0)) & (wl <= (w + sigma0)) # update the covered regions covered = covered | ind # Take the current nuisance positions as a starting point, and add the regions starting_dict = self.sampler.params.copy() starting_dict["regions"] = regions region_mus = np.array([region_mus[i] for i in range(self.nregions)]) # Setup the priors region_priors = config["region_priors"] region_priors.update({"mus":region_mus}) prior_params = {"regions":region_priors} # do all this crap again cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,)) cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in self.sampler.cov_tup])**2 region_MH_cov = [float(config["region_jump"][key])**2 for key in C.cov_region_parameters] regions_MH_cov = np.array([region_MH_cov for i in range(self.nregions)]).flatten() nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov, regions_MH_cov))) print(starting_dict) print("cov shape {}".format(nuisance_MH_cov.shape)) # Initialize a new sampler, replacing the old one self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=starting_dict, cov=nuisance_MH_cov, debug=True, outdir=self.noutdir, prior_params=prior_params, order=self.order) self.p0 = self.sampler.p0 # Update the nuisance parameters to the starting values so that we at least have a self.data_mat print("Updating nuisance parameter data products to starting values.") self.update_nuisance(starting_dict) self.lnprob = self.evaluate() # To speed up convergence, try just doing a bunch of nuisance runs before # going into the iteration pattern print("Doing nuisance burn-in for {} samples".format(config["nuisance_burn"])) self.independent_sample(config["nuisance_burn"]) def get_lnprob(self, *args): ''' Return the *current* value of lnprob. Intended to be called from the master process (StellarSampler.sample), to query the child processes for their current value of lnprob. ''' return self.lnprob def stellar_lnprob(self, params): ''' Update the model to the parameters and then evaluate the lnprob. Intended to be called from the master process via the command "LNPROB". ''' try: self.update_stellar(params) lnp = self.evaluate() # Also sets self.lnprob to new value return lnp except C.ModelError: self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(params)) return -np.inf def evaluate(self): ''' Return the lnprob using the current version of the DataCovariance matrix and other intermediate products. ''' self.lnprob_last = self.lnprob X = (self.ChebyshevSpectrum.k * self.flux_std * np.eye(self.npoints)).dot(self.pcomps.T) CC = X.dot(self.C_GP.dot(X.T)) + self.data_mat R = self.fl - self.ChebyshevSpectrum.k * self.flux_mean - X.dot(self.mus) try: factor, flag = cho_factor(CC) except np.linalg.LinAlgError as e: self.logger.debug("self.sampler.params are {}".format(self.sampler.params)) raise C.ModelError("Can't Cholesky factor {}".format(e)) logdet = np.sum(2 * np.log((np.diag(factor)))) self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet) + self.prior if self.counter % 100 == 0: self.resid_deque.append(R) self.counter += 1 return self.lnprob def revert_stellar(self): ''' Revert the status of the model from a rejected stellar proposal. ''' self.logger.debug("Reverting stellar parameters") self.lnprob = self.lnprob_last self.flux_mean = self.flux_mean_last self.flux_std = self.flux_std_last self.pcomps = self.pcomps_last self.mus, self.vars = self.mus_last, self.vars_last self.C_GP = self.C_GP_last def update_stellar(self, params): ''' Update the model to the current stellar parameters. ''' self.logger.debug("Updating stellar parameters to {}".format(params)) # Store the current accepted values before overwriting with new proposed values. self.flux_mean_last = self.flux_mean self.flux_std_last = self.flux_std self.pcomps_last = self.pcomps self.mus_last, self.vars_last = self.mus, self.vars self.C_GP_last = self.C_GP #TODO: Possible speedups: # 1. Store the PCOMPS pre-FFT'd # Shift the velocity vz = params["vz"] # Local, shifted copy wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + vz) / (C.c_kms - vz)) # FFT and convolve operations vsini = params["vsini"] if vsini < 0.2: raise C.ModelError("vsini must be positive") FF = np.fft.rfft(self.PCOMPS, axis=1) # Determine the stellar broadening kernel ub = 2. * np.pi * vsini * self.ss sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3) # set zeroth frequency to 1 separately (DC term) sb[0] = 1. # institute velocity and instrumental taper FF_tap = FF * sb # do ifft pcomps_full = np.fft.irfft(FF_tap, len(wl_FFT), axis=1) # Spectrum resample operations if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT): raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({" ":.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT))) # Take the output from the FFT operation (pcomps_full), and stuff them # into respective data products for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.pcomps), pcomps_full): interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5) lres[:] = interp(self.wl) del interp gc.collect() # Adjust flux_mean and flux_std by Omega Omega = 10**params["logOmega"] self.flux_mean *= Omega self.flux_std *= Omega # Now update the parameters from the emulator pars = np.array([params["temp"], params["logg"], params["Z"]]) # If pars are outside the grid, Emulator will raise C.ModelError self.mus, self.vars = self.Emulator(pars) self.C_GP = self.vars * np.eye(self.ncomp) def decide_stellar(self, yes): ''' Interpret the decision from the master process to either revert the stellar model (rejected parameters) or move on (accepted parameters). ''' if yes: # accept and move on self.logger.debug("Deciding to accept stellar parameters") else: # revert and move on self.logger.debug("Deciding to revert stellar parameters") self.revert_stellar() # Proceed with independent sampling self.independent_sample(1) def update_nuisance(self, params): ''' Update the nuisance parameters and data covariance matrix. :param params: large dictionary containing cheb, cov, and regions ''' self.logger.debug("Updating nuisance parameters to {}".format(params)) # Read off the Chebyshev parameters and update self.ChebyshevSpectrum.update(params["cheb"]) # Create the full data covariance matrix. l = params["cov"]["l"] sigAmp = params["cov"]["sigAmp"] # Check to make sure the global covariance parameters make sense if sigAmp < 0.1: raise C.ModelError("sigAmp shouldn't be lower than 0.1, something is wrong.") max_r = 6.0 * l # [km/s] # Check all regions, take the max if self.nregions > 0: regions = params["regions"] keys = sorted(regions) sigmas = np.array([regions[key]["sigma"] for key in keys]) #km/s #mus = np.array([regions[key]["mu"] for key in keys]) max_reg = 4.0 * np.max(sigmas) #If this is a larger distance than the global length, replace it max_r = max_reg if max_reg > max_r else max_r #print("Max_r now set by regions {}".format(max_r)) # print("max_r is {}".format(max_r)) # Create a partial function which returns the proper element. k_func = make_k_func(params) # Store the previous data matrix in case we want to revert later self.data_mat_last = self.data_mat self.data_mat = get_dense_C(self.wl, k_func=k_func, max_r=max_r) + sigAmp*self.sigma_matrix def revert_nuisance(self, *args): ''' Revert all products from the nuisance parameters, including the data covariance matrix. ''' self.logger.debug("Reverting nuisance parameters") self.lnprob = self.lnprob_last self.ChebyshevSpectrum.revert() self.data_mat = self.data_mat_last def clear_resid_deque(self): ''' Clear the accumulated residual spectra. ''' self.resid_deque.clear() def independent_sample(self, niter): ''' Do the independent sampling specific to this echelle order, using the attached self.sampler (NuisanceSampler). :param niter: number of iterations to complete before returning to master process. ''' self.logger.debug("Beginning independent sampling on nuisance parameters") if self.lnprob: # If we have a current value, pass it to the sampler self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter, lnprob0=self.lnprob) else: # Otherwise, start from the beginning self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter) self.logger.debug("Finished independent sampling on nuisance parameters") # Don't return anything to the master process. def finish(self, *args): ''' Wrap up the sampling and write the samples to disk. ''' print(self.sampler.acceptance_fraction) print(self.sampler.acor) self.sampler.write() self.sampler.plot() # triangle_plot=True print("There were {} exceptions.".format(len(self.exceptions))) # print out the values of each region key. for exception in self.exceptions: regions = exception["regions"] keys = sorted(regions) for key in keys: print(regions[key]) cov = exception["cov"] print(cov) print("\n\n") def brain(self, conn): ''' The infinite loop of the subprocess, which continues to listen for messages on the pipe. ''' self.conn = conn alive = True while alive: #Keep listening for messages put on the Pipe alive = self.interpret() #Once self.interpret() returns `False`, this loop will die. self.conn.send("DEAD") def interpret(self): ''' Interpret the messages being put into the Pipe, and do something with them. Messages are always sent in a 2-arg tuple (fname, arg) Right now we only expect one function and one argument but this could be generalized to **args. ''' #info("brain") fname, arg = self.conn.recv() # Waits here to receive a new message self.logger.debug("{} received message {}".format(os.getpid(), (fname, arg))) func = self.func_dict.get(fname, False) if func: response = func(arg) else: self.logger.info("Given an unknown function {}, assuming kill signal.".format(fname)) return False # Functions only return a response other than None when they want them # communicated back to the master process. # Some commands sent to the child processes do not require a response # to the main process. if response: self.logger.debug("{} sending back {}".format(os.getpid(), response)) self.conn.send(response) return True