class Order:
    def __init__(self, debug=False):
        '''
        This object contains all of the variables necessary for the partial
        lnprob calculation for one echelle order. It is designed to first be
        instantiated within the main processes and then forked to other
        subprocesses. Once operating in the subprocess, the variables specific
        to the order are loaded with an `INIT` message call, which tells which key
        to initialize on in the `self.initialize()`.
        '''
        self.lnprob = -np.inf
        self.lnprob_last = -np.inf

        self.debug = debug

    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        #self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.emulator = Emulator.open()
        self.emulator.determine_chunk_log(self.wl)

        self.pca = self.emulator.pca

        self.wl_FFT = self.pca.wl

        # The raw eigenspectra and mean flux components
        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))

        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        # Holders to store the convolved and resampled eigenspectra
        self.eigenspectra = np.empty((self.pca.m, self.ndata))
        self.flux_mean = np.empty((self.ndata,))
        self.flux_std = np.empty((self.ndata,))
        self.flux_scalar = None

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
        self.mus, self.C_GP, self.data_mat = None, None, None
        self.Omega = None

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)

    def evaluate(self):
        '''
        Return the lnprob using the current version of the C_GP matrix, data matrix,
        and other intermediate products.
        '''

        self.lnprob_last = self.lnprob

        X = (self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T)

        part1 = X.dot(self.C_GP.dot(X.T))
        part2 = self.data_mat
        CC = part2 + part1

        try:
            factor, flag = cho_factor(CC)
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            self.CC_debugger(CC)
            np.save('X.npy', X)
            np.save('part1.npy', part1)
            np.save('part2.npy', part2)
            np.save('flux_mean.npy', self.flux_mean)
            np.save('flux_std.npy', self.flux_std)
            np.save('C_GP.npy', self.C_GP)
            raise

        try:

            model1 = (self.flux_mean + X.dot(self.mus))
            R = self.fl - model1

            logdet = np.sum(2 * np.log((np.diag(factor))))
            self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet)

            self.logger.debug("Evaluating lnprob={}".format(self.lnprob))
            return self.lnprob

        # To give us some debugging information about what went wrong.
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            raise

    def CC_debugger(self, CC):
        '''
        Special debugging information for the covariance matrix decomposition.
        '''
        print('{:-^60}'.format('CC_debugger'))
        print("See https://github.com/iancze/Starfish/issues/26")
        np.save('CC_matrix.npy', CC)
        print("Covariance matrix at a glance:")
        if (CC.diagonal().min() < 0.0):
            print("- Negative entries on the diagonal:")
            print("\t- Check sigAmp: should be positive")
            print("\t- Check uncertainty estimates: should all be positive")
        elif np.any(np.isnan(CC.diagonal())):
            print("- Covariance matrix has a NaN value on the diagonal")
        else:
            if not np.allclose(CC, CC.T):
                print("- The covariance matrix is highly asymmetric")

            #Still might have an asymmetric matrix below `allclose` threshold
            evals_CC, evecs_CC = np.linalg.eigh(CC)
            n_neg = (evals_CC < 0).sum()
            n_tot = len(evals_CC)
            print("- There are {} negative eigenvalues out of {}.".format(n_neg, n_tot))
            mark = lambda val: '>' if val < 0 else '.'

            print("Covariance matrix eigenvalues:")
            print(*["{: >6} {:{fill}>20.3e}".format(i, evals_CC[i], 
                                                    fill=mark(evals_CC[i])) for i in range(10)], sep='\n')
            print('{: >15}'.format('...'))
            print(*["{: >6} {:{fill}>20.3e}".format(n_tot-10+i, evals_CC[-10+i], 
                                                   fill=mark(evals_CC[-10+i])) for i in range(10)], sep='\n')
        print('{:-^60}'.format('-'))


    def update_Theta(self, p):
        '''
        Update the model to the current Theta parameters.

        :param p: parameters to update model to
        :type p: model.ThetaParam
        '''

        # durty HACK to get fixed logg
        # Simply fixes the middle value to be 4.29
        # Check to see if it exists, as well
        fix_logg = Starfish.config.get("fix_logg", None)
        if fix_logg is not None:
            p.grid[1] = fix_logg
        #print("grid pars are", p.grid)

        self.logger.debug("Updating Theta parameters to {}".format(p))

        # Store the current accepted values before overwriting with new proposed values.
        self.flux_mean_last = self.flux_mean.copy()
        self.flux_std_last = self.flux_std.copy()
        self.eigenspectra_last = self.eigenspectra.copy()
        self.mus_last = self.mus
        self.C_GP_last = self.C_GP

        # Local, shifted copy of wavelengths
        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz))

        # If vsini is less than 0.2 km/s, we might run into issues with
        # the grid spacing. Therefore skip the convolution step if we have
        # values smaller than this.
        # FFT and convolve operations
        if p.vsini < 0.0:
            raise C.ModelError("vsini must be positive")
        elif p.vsini < 0.2:
            # Skip the vsini taper due to instrumental effects
            eigenspectra_full = self.EIGENSPECTRA.copy()
        else:
            FF = np.fft.rfft(self.EIGENSPECTRA, axis=1)

            # Determine the stellar broadening kernel
            ub = 2. * np.pi * p.vsini * self.ss
            sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3)
            # set zeroth frequency to 1 separately (DC term)
            sb[0] = 1.

            # institute vsini taper
            FF_tap = FF * sb

            # do ifft
            eigenspectra_full = np.fft.irfft(FF_tap, self.pca.npix, axis=1)

        # Spectrum resample operations
        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
            raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT)))

        # Take the output from the FFT operation (eigenspectra_full), and stuff them
        # into respective data products
        for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.eigenspectra), eigenspectra_full):
            interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5)
            lres[:] = interp(self.wl)
            del interp

        # Helps keep memory usage low, seems like the numpy routine is slow
        # to clear allocated memory for each iteration.
        gc.collect()

        # Adjust flux_mean and flux_std by Omega
        #Omega = 10**p.logOmega
        #self.flux_mean *= Omega
        #self.flux_std *= Omega

        # Now update the parameters from the emulator
        # If pars are outside the grid, Emulator will raise C.ModelError
        self.emulator.params = p.grid
        self.mus, self.C_GP = self.emulator.matrix
        self.flux_scalar = self.emulator.absolute_flux
        self.Omega = 10**p.logOmega
        self.flux_mean *= (self.Omega*self.flux_scalar)
        self.flux_std *= (self.Omega*self.flux_scalar)
    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        #self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.emulator = Emulator.open()
        self.emulator.determine_chunk_log(self.wl)

        self.pca = self.emulator.pca

        self.wl_FFT = self.pca.wl

        # The raw eigenspectra and mean flux components
        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))

        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        # Holders to store the convolved and resampled eigenspectra
        self.eigenspectra = np.empty((self.pca.m, self.ndata))
        self.flux_mean = np.empty((self.ndata,))
        self.flux_std = np.empty((self.ndata,))
        self.flux_scalar = None

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
        self.mus, self.C_GP, self.data_mat = None, None, None
        self.Omega = None

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
Beispiel #3
0
class Order:
    def __init__(self, debug=False):
        '''
        This object contains all of the variables necessary for the partial
        lnprob calculation for one echelle order. It is designed to first be
        instantiated within the main processes and then forked to other
        subprocesses. Once operating in the subprocess, the variables specific
        to the order are loaded with an `INIT` message call, which tells which key
        to initialize on in the `self.initialize()`.
        '''
        self.lnprob = -np.inf
        self.lnprob_last = -np.inf

        self.func_dict = {"INIT": self.initialize,
                          "DECIDE": self.decide_Theta,
                          "INST": self.instantiate,
                          "LNPROB": self.lnprob_Theta,
                          "GET_LNPROB": self.get_lnprob,
                          "FINISH": self.finish,
                          "SAVE": self.save,
                          "OPTIMIZE_CHEB": self.optimize_Cheb
                          }

        self.debug = debug
        self.logger = logging.getLogger("{}".format(self.__class__.__name__))

    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.emulator = Emulator.open()
        self.emulator.determine_chunk_log(self.wl)

        self.pca = self.emulator.pca

        self.wl_FFT = self.pca.wl

        # The raw eigenspectra and mean flux components
        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))

        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        # Holders to store the convolved and resampled eigenspectra
        self.eigenspectra = np.empty((self.pca.m, self.ndata))
        self.flux_mean = np.empty((self.ndata,))
        self.flux_std = np.empty((self.ndata,))

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
        self.mus, self.C_GP, self.data_mat = None, None, None

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)

    def instantiate(self, *args):
        '''
        If mixing Theta and Phi optimization/sampling, perform the sigma clipping
        operation to instantiate covariant regions to cover outliers.

        May involve creating a new NuisanceSampler.
        '''
        raise NotImplementedError

    def get_lnprob(self, *args):
        '''
        Return the *current* value of lnprob.

        Intended to be called from the master process to
        query the child processes for their current value of lnprob.
        '''
        return self.lnprob

    def lnprob_Theta(self, p):
        '''
        Update the model to the Theta parameters and then evaluate the lnprob.

        Intended to be called from the master process via the command "LNPROB".
        '''
        try:
            self.update_Theta(p)
            lnp = self.evaluate() # Also sets self.lnprob to new value
            return lnp
        except C.ModelError:
            self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(p))
            return -np.inf

    def evaluate(self):
        '''
        Return the lnprob using the current version of the C_GP matrix, data matrix,
        and other intermediate products.
        '''

        self.lnprob_last = self.lnprob

        X = (self.chebyshevSpectrum.k * self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T)

        CC = X.dot(self.C_GP.dot(X.T)) + self.data_mat

        try:
            factor, flag = cho_factor(CC)
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            self.CC_debugger(CC)
            raise

        try:
            R = self.fl - self.chebyshevSpectrum.k * self.flux_mean - X.dot(self.mus)

            logdet = np.sum(2 * np.log((np.diag(factor))))
            self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet)

            self.logger.debug("Evaluating lnprob={}".format(self.lnprob))
            return self.lnprob

        # To give us some debugging information about what went wrong.
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            raise

    def CC_debugger(self, CC):
        '''
        Special debugging information for the covariance matrix decomposition.
        '''
        print('{:-^60}'.format('CC_debugger'))
        print("See https://github.com/iancze/Starfish/issues/26")
        print("Covariance matrix at a glance:")
        if (CC.diagonal().min() < 0.0):
            print("- Negative entries on the diagonal:")
            print("\t- Check sigAmp: should be positive")
            print("\t- Check uncertainty estimates: should all be positive")
        elif np.any(np.isnan(CC.diagonal())):
            print("- Covariance matrix has a NaN value on the diagonal")
        else:
            if not np.allclose(CC, CC.T):
                print("- The covariance matrix is highly asymmetric")

            #Still might have an asymmetric matrix below `allclose` threshold
            evals_CC, evecs_CC = np.linalg.eigh(CC)
            n_neg = (evals_CC < 0).sum()
            n_tot = len(evals_CC)
            print("- There are {} negative eigenvalues out of {}.".format(n_neg, n_tot))
            mark = lambda val: '>' if val < 0 else '.'

            print("Covariance matrix eigenvalues:")
            print(*["{: >6} {:{fill}>20.3e}".format(i, evals_CC[i], 
                                                    fill=mark(evals_CC[i])) for i in range(10)], sep='\n')
            print('{: >15}'.format('...'))
            print(*["{: >6} {:{fill}>20.3e}".format(n_tot-10+i, evals_CC[-10+i], 
                                                   fill=mark(evals_CC[-10+i])) for i in range(10)], sep='\n')
        print('{:-^60}'.format('-'))

    def update_Theta(self, p):
        '''
        Update the model to the current Theta parameters.

        :param p: parameters to update model to
        :type p: model.ThetaParam
        '''

        # durty HACK to get fixed logg
        # Simply fixes the middle value to be 4.29
        # Check to see if it exists, as well
        fix_logg = Starfish.config.get("fix_logg", None)
        if fix_logg is not None:
            p.grid[1] = fix_logg
        print("grid pars are", p.grid)

        self.logger.debug("Updating Theta parameters to {}".format(p))

        # Store the current accepted values before overwriting with new proposed values.
        self.flux_mean_last = self.flux_mean.copy()
        self.flux_std_last = self.flux_std.copy()
        self.eigenspectra_last = self.eigenspectra.copy()
        self.mus_last = self.mus
        self.C_GP_last = self.C_GP

        # Local, shifted copy of wavelengths
        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz))

        # If vsini is less than 0.2 km/s, we might run into issues with
        # the grid spacing. Therefore skip the convolution step if we have
        # values smaller than this.
        # FFT and convolve operations
        if p.vsini < 0.0:
            raise C.ModelError("vsini must be positive")
        elif p.vsini < 0.2:
            # Skip the vsini taper due to instrumental effects
            eigenspectra_full = self.EIGENSPECTRA.copy()
        else:
            FF = np.fft.rfft(self.EIGENSPECTRA, axis=1)

            # Determine the stellar broadening kernel
            ub = 2. * np.pi * p.vsini * self.ss
            sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3)
            # set zeroth frequency to 1 separately (DC term)
            sb[0] = 1.

            # institute vsini taper
            FF_tap = FF * sb

            # do ifft
            eigenspectra_full = np.fft.irfft(FF_tap, self.pca.npix, axis=1)

        # Spectrum resample operations
        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
            raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT)))

        # Take the output from the FFT operation (eigenspectra_full), and stuff them
        # into respective data products
        for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.eigenspectra), eigenspectra_full):
            interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5)
            lres[:] = interp(self.wl)
            del interp

        # Helps keep memory usage low, seems like the numpy routine is slow
        # to clear allocated memory for each iteration.
        gc.collect()

        # Adjust flux_mean and flux_std by Omega
        Omega = 10**p.logOmega
        self.flux_mean *= Omega
        self.flux_std *= Omega



        # Now update the parameters from the emulator
        # If pars are outside the grid, Emulator will raise C.ModelError
        self.emulator.params = p.grid
        self.mus, self.C_GP = self.emulator.matrix

    def revert_Theta(self):
        '''
        Revert the status of the model from a rejected Theta proposal.
        '''

        self.logger.debug("Reverting Theta parameters")

        self.lnprob = self.lnprob_last

        self.flux_mean = self.flux_mean_last
        self.flux_std = self.flux_std_last
        self.eigenspectra = self.eigenspectra_last

        self.mus = self.mus_last
        self.C_GP = self.C_GP_last

    def decide_Theta(self, yes):
        '''
        Interpret the decision from the master process to either revert the
        Theta model (rejected parameters) or move on (accepted parameters).

        :param yes: if True, accept stellar parameters.
        :type yes: boolean
        '''
        if yes:
            # accept and move on
            self.logger.debug("Deciding to accept Theta parameters")
        else:
            # revert and move on
            self.logger.debug("Deciding to revert Theta parameters")
            self.revert_Theta()

        # Proceed with independent sampling
        self.independent_sample(1)

    def optimize_Cheb(self, *args):
        '''
        Keeping the current Theta parameters fixed and assuming white noise,
        optimize the Chebyshev parameters
        '''

        if self.chebyshevSpectrum.fix_c0:
            p0 = np.zeros((self.npoly - 1))
            self.fix_c0 = True
        else:
            p0 = np.zeros((self.npoly))
            self.fix_c0 = False

        def fprob(p):
            self.chebyshevSpectrum.update(p)
            lnp = self.evaluate()
            print(self.order, p, lnp)
            if lnp == -np.inf:
                return 1e99
            else:
                return -lnp

        from scipy.optimize import fmin
        result = fmin(fprob, p0, maxiter=10000, maxfun=10000)
        print(self.order, result)

        # Due to a JSON bug, np.int64 type objects will get read twice,
        # and cause this routine to fail. Therefore we have to be careful
        # to convert these to ints.
        phi = PhiParam(spectrum_id=int(self.spectrum_id), order=int(self.order), fix_c0=self.chebyshevSpectrum.fix_c0, cheb=result)
        phi.save()

    def update_Phi(self, p):
        '''
        Update the Phi parameters and data covariance matrix.

        :param params: large dictionary containing cheb, cov, and regions
        '''

        raise NotImplementedError

    def revert_Phi(self, *args):
        '''
        Revert all products from the nuisance parameters, including the data
        covariance matrix.
        '''

        self.logger.debug("Reverting Phi parameters")

        self.lnprob = self.lnprob_last

        self.chebyshevSpectrum.revert()
        self.data_mat = self.data_mat_last

    def clear_resid_deque(self):
        '''
        Clear the accumulated residual spectra.
        '''
        self.resid_deque.clear()

    def independent_sample(self, niter):
        '''
        Do the independent sampling specific to this echelle order, using the
        attached self.sampler (NuisanceSampler).

        :param niter: number of iterations to complete before returning to master process.

        '''

        self.logger.debug("Beginning independent sampling on Phi parameters")

        if self.lnprob:
            # If we have a current value, pass it to the sampler
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter, lnprob0=self.lnprob)
        else:
            # Otherwise, start from the beginning
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter)

        self.logger.debug("Finished independent sampling on Phi parameters")
        # Don't return anything to the master process.

    def finish(self, *args):
        '''
        Wrap up the sampling and write the samples to disk.
        '''
        self.logger.debug("Finishing")

    def brain(self, conn):
        '''
        The infinite loop of the subprocess, which continues to listen for
        messages on the pipe.
        '''
        self.conn = conn
        alive = True
        while alive:
            #Keep listening for messages put on the Pipe
            alive = self.interpret()
            #Once self.interpret() returns `False`, this loop will die.
        self.conn.send("DEAD")

    def interpret(self):
        '''
        Interpret the messages being put into the Pipe, and do something with
        them. Messages are always sent in a 2-arg tuple (fname, arg)
        Right now we only expect one function and one argument but this could
        be generalized to **args.
        '''
        #info("brain")

        fname, arg = self.conn.recv() # Waits here to receive a new message
        self.logger.debug("{} received message {}".format(os.getpid(), (fname, arg)))

        func = self.func_dict.get(fname, False)
        if func:
            response = func(arg)
        else:
            self.logger.info("Given an unknown function {}, assuming kill signal.".format(fname))
            return False

        # Functions only return a response other than None when they want them
        # communicated back to the master process.
        # Some commands sent to the child processes do not require a response
        # to the main process.
        if response:
            self.logger.debug("{} sending back {}".format(os.getpid(), response))
            self.conn.send(response)
        return True

    def save(self, *args):
        '''
        Using the current values for flux, write out the data, mean model, and mean
        residuals into a JSON.
        '''

        X = (self.chebyshevSpectrum.k * self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T)

        model = self.chebyshevSpectrum.k * self.flux_mean + X.dot(self.mus)
        resid = self.fl - model

        my_dict = {"wl":self.wl.tolist(), "data":self.fl.tolist(), "model":model.tolist(), "resid":resid.tolist(), "sigma":self.sigma.tolist(), "spectrum_id":self.spectrum_id, "order":self.order}

        fname = Starfish.specfmt.format(self.spectrum_id, self.order)
        f = open(fname + "spec.json", 'w')
        json.dump(my_dict, f, indent=2, sort_keys=True)
        f.close()
Beispiel #4
0
    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.emulator = Emulator.open()
        self.emulator.determine_chunk_log(self.wl)

        self.pca = self.emulator.pca

        self.wl_FFT = self.pca.wl

        # The raw eigenspectra and mean flux components
        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))

        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        # Holders to store the convolved and resampled eigenspectra
        self.eigenspectra = np.empty((self.pca.m, self.ndata))
        self.flux_mean = np.empty((self.ndata,))
        self.flux_std = np.empty((self.ndata,))

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
        self.mus, self.C_GP, self.data_mat = None, None, None

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)
Beispiel #5
0
class Order:
    def __init__(self, debug=False):
        '''
        This object contains all of the variables necessary for the partial
        lnprob calculation for one echelle order. It is designed to first be
        instantiated within the main processes and then forked to other
        subprocesses. Once operating in the subprocess, the variables specific
        to the order are loaded with an `INIT` message call, which tells which key
        to initialize on in the `self.initialize()`.
        '''
        self.lnprob = -np.inf
        self.lnprob_last = -np.inf

        self.func_dict = {
            "INIT": self.initialize,
            "DECIDE": self.decide_Theta,
            "INST": self.instantiate,
            "LNPROB": self.lnprob_Theta,
            "GET_LNPROB": self.get_lnprob,
            "FINISH": self.finish,
            "SAVE": self.save,
            "OPTIMIZE_CHEB": self.optimize_Cheb
        }

        self.debug = debug
        self.logger = logging.getLogger("{}".format(self.__class__.__name__))

    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__,
                                                       self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(
            self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum,
                                                   self.order_key,
                                                   npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id,
                                        self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        self.resid_deque = deque(
            maxlen=500
        )  #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.interpolator = Interpolator(self.wl, HDF5Interface())
        self.flux = None  # Where the interpolator will store the flux

        self.wl_FFT = self.interpolator.wl

        # The raw eigenspectra and mean flux components

        self.ss = np.fft.rfftfreq(len(self.wl_FFT),
                                  d=self.interpolator.interface.dv)
        self.ss[0] = 0.01  # junk so we don't get a divide by zero error

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)

        self.lnprior = 0.0  # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(
            self.spectrum_id, self.order)

    def instantiate(self, *args):
        '''
        If mixing Theta and Phi optimization/sampling, perform the sigma clipping
        operation to instantiate covariant regions to cover outliers.

        May involve creating a new NuisanceSampler.
        '''
        raise NotImplementedError

    def get_lnprob(self, *args):
        '''
        Return the *current* value of lnprob.

        Intended to be called from the master process to
        query the child processes for their current value of lnprob.
        '''
        return self.lnprob

    def lnprob_Theta(self, p):
        '''
        Update the model to the Theta parameters and then evaluate the lnprob.

        Intended to be called from the master process via the command "LNPROB".
        '''
        try:
            self.update_Theta(p)
            lnp = self.evaluate()  # Also sets self.lnprob to new value
            return lnp
        except (C.ModelError, C.InterpolationError):
            self.logger.debug(
                "ModelError in stellar parameters, sending back -np.inf {}".
                format(p))
            return -np.inf

    def evaluate(self):
        '''
        Return the lnprob using the current version of the C_GP matrix, data matrix,
        and other intermediate products.
        '''

        self.lnprob_last = self.lnprob

        CC = self.data_mat

        model = self.chebyshevSpectrum.k * self.flux

        try:

            factor, flag = cho_factor(CC)

            R = self.fl - model

            logdet = np.sum(2 * np.log((np.diag(factor))))
            self.lnprob = -0.5 * (np.dot(R, cho_solve(
                (factor, flag), R)) + logdet)

            self.logger.debug("Evaluating lnprob={}".format(self.lnprob))
            return self.lnprob

        # To give us some debugging information about what went wrong.
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            raise

    def update_Theta(self, p):
        '''
        Update the model to the current Theta parameters.

        :param p: parameters to update model to
        :type p: model.ThetaParam
        '''

        # Dirty hack
        fix_logg = Starfish.config.get("fix_logg", None)
        if fix_logg is not None:
            p.grid[1] = fix_logg
        print("grid pars are", p.grid)

        self.logger.debug("Updating Theta parameters to {}".format(p))

        # Store the current accepted values before overwriting with new proposed values.
        self.flux_last = self.flux

        # Local, shifted copy of wavelengths
        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz))

        flux_raw = self.interpolator(p.grid)

        # If vsini is less than 0.2 km/s, we might run into issues with
        # the grid spacing. Therefore skip the convolution step if we have
        # values smaller than this.
        # FFT and convolve operations
        if p.vsini < 0.0:
            raise C.ModelError("vsini must be positive")
        elif p.vsini < 0.2:
            # Skip the vsini taper due to instrumental effects
            flux_taper = flux_raw
        else:
            FF = np.fft.rfft(flux_raw)

            # Determine the stellar broadening kernel
            ub = 2. * np.pi * p.vsini * self.ss
            sb = j1(ub) / ub - 3 * np.cos(ub) / (
                2 * ub**2) + 3. * np.sin(ub) / (2 * ub**3)
            # set zeroth frequency to 1 separately (DC term)
            sb[0] = 1.

            # institute vsini taper
            FF_tap = FF * sb

            # do ifft
            flux_taper = np.fft.irfft(FF_tap, len(self.wl_FFT))

        # Spectrum resample operations
        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
            raise RuntimeError(
                "Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})"
                .format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT)))

        # Take the output from the FFT operation and stuff it into the respective data products
        interp = InterpolatedUnivariateSpline(wl_FFT, flux_taper, k=5)
        self.flux = interp(self.wl)
        del interp

        gc.collect()

        # Adjust flux_mean and flux_std by Omega
        Omega = 10**p.logOmega
        self.flux *= Omega

    def revert_Theta(self):
        '''
        Revert the status of the model from a rejected Theta proposal.
        '''

        self.logger.debug("Reverting Theta parameters")

        self.lnprob = self.lnprob_last

        self.flux = self.flux_last

    def decide_Theta(self, yes):
        '''
        Interpret the decision from the master process to either revert the
        Theta model (rejected parameters) or move on (accepted parameters).

        :param yes: if True, accept stellar parameters.
        :type yes: boolean
        '''
        if yes:
            # accept and move on
            self.logger.debug("Deciding to accept Theta parameters")
        else:
            # revert and move on
            self.logger.debug("Deciding to revert Theta parameters")
            self.revert_Theta()

        # Proceed with independent sampling
        self.independent_sample(1)

    def optimize_Cheb(self, *args):
        '''
        Keeping the current Theta parameters fixed and assuming white noise,
        optimize the Chebyshev parameters
        '''

        # self.fix_c0 = True if index == (len(DataSpectrum.wls) - 1) else False #Fix the last c0
        # This is necessary if we want to update just a single order.

        if self.chebyshevSpectrum.fix_c0 & len(self.dataSpectrum.wls) > 1:
            p0 = np.zeros((self.npoly - 1))
        else:
            self.chebyshevSpectrum.fix_c0 = False
            p0 = np.zeros((self.npoly))

        def fprob(p):
            self.chebyshevSpectrum.update(p)
            lnp = self.evaluate()
            print(self.order, p, lnp)
            if lnp == -np.inf:
                return 1e99
            else:
                return -lnp

        from scipy.optimize import fmin
        result = fmin(fprob, p0, maxiter=10000, maxfun=10000)
        print(self.order, result)

        # Due to a JSON bug, np.int64 type objects will get read twice,
        # and cause this routine to fail. Therefore we have to be careful
        # to convert these to ints.
        phi = PhiParam(spectrum_id=int(self.spectrum_id),
                       order=int(self.order),
                       fix_c0=self.chebyshevSpectrum.fix_c0,
                       cheb=result)
        phi.save()

    def update_Phi(self, p):
        '''
        Update the Phi parameters and data covariance matrix.

        :param params: large dictionary containing cheb, cov, and regions
        '''

        raise NotImplementedError

    def revert_Phi(self, *args):
        '''
        Revert all products from the nuisance parameters, including the data
        covariance matrix.
        '''

        self.logger.debug("Reverting Phi parameters")

        self.lnprob = self.lnprob_last

        self.chebyshevSpectrum.revert()
        self.data_mat = self.data_mat_last

    def clear_resid_deque(self):
        '''
        Clear the accumulated residual spectra.
        '''
        self.resid_deque.clear()

    def independent_sample(self, niter):
        '''
        Do the independent sampling specific to this echelle order, using the
        attached self.sampler (NuisanceSampler).

        :param niter: number of iterations to complete before returning to master process.

        '''

        self.logger.debug("Beginning independent sampling on Phi parameters")

        if self.lnprob:
            # If we have a current value, pass it to the sampler
            self.p0, self.lnprob, state = self.sampler.run_mcmc(
                pos0=self.p0, N=niter, lnprob0=self.lnprob)
        else:
            # Otherwise, start from the beginning
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0,
                                                                N=niter)

        self.logger.debug("Finished independent sampling on Phi parameters")
        # Don't return anything to the master process.

    def finish(self, *args):
        '''
        Wrap up the sampling and write the samples to disk.
        '''
        self.logger.debug("Finishing")

    def brain(self, conn):
        '''
        The infinite loop of the subprocess, which continues to listen for
        messages on the pipe.
        '''
        self.conn = conn
        alive = True
        while alive:
            #Keep listening for messages put on the Pipe
            alive = self.interpret()
            #Once self.interpret() returns `False`, this loop will die.
        self.conn.send("DEAD")

    def interpret(self):
        '''
        Interpret the messages being put into the Pipe, and do something with
        them. Messages are always sent in a 2-arg tuple (fname, arg)
        Right now we only expect one function and one argument but this could
        be generalized to **args.
        '''
        #info("brain")

        fname, arg = self.conn.recv()  # Waits here to receive a new message
        self.logger.debug("{} received message {}".format(
            os.getpid(), (fname, arg)))

        func = self.func_dict.get(fname, False)
        if func:
            response = func(arg)
        else:
            self.logger.info(
                "Given an unknown function {}, assuming kill signal.".format(
                    fname))
            return False

        # Functions only return a response other than None when they want them
        # communicated back to the master process.
        # Some commands sent to the child processes do not require a response
        # to the main process.
        if response:
            self.logger.debug("{} sending back {}".format(
                os.getpid(), response))
            self.conn.send(response)
        return True

    def save(self, *args):
        '''
        Using the current values for flux, write out the data, mean model, and mean
        residuals into a JSON.
        '''

        resid = self.fl - self.flux

        my_dict = {
            "wl": self.wl.tolist(),
            "data": self.fl.tolist(),
            "model": self.flux.tolist(),
            "resid": resid.tolist(),
            "sigma": self.sigma.tolist(),
            "spectrum_id": self.spectrum_id,
            "order": self.order
        }

        fname = Starfish.specfmt.format(self.spectrum_id, self.order)
        f = open(fname + "spec.json", 'w')
        json.dump(my_dict, f, indent=2, sort_keys=True)
        f.close()
sigma_mat = sigma**2 * np.eye(ndata)
mus, C_GP, data_mat = None, None, None

# For each star


# In the config file, list the astroseismic parameters as the starting grid parameters
# Read this into a ThetaParam object
grid = np.array(Starfish.config["Theta"]["grid"])
# Now update the parameters for the emulator
# If pars are outside the grid, Emulator will raise C.ModelError
emulator.params = grid
mus, C_GP = emulator.matrix

npoly = Starfish.config["cheb_degree"]
chebyshevSpectrum = ChebyshevSpectrum(dataSpec, 0, npoly=npoly)
chebyshevSpectrum.update(np.array(Starfish.config["chebs"]))

def lnprob(p):
    vz, vsini, logOmega = p[:3]
    cheb = p[3:]

    chebyshevSpectrum.update(cheb)

    # Local, shifted copy of wavelengths
    wl_FFT = wl_FFT_orig * np.sqrt((C.c_kms + vz) / (C.c_kms - vz))

    # Holders to store the convolved and resampled eigenspectra
    eigenspectra = np.empty((pca.m, ndata))
    flux_mean = np.empty((ndata,))
    flux_std = np.empty((ndata,))
Beispiel #7
0
    def initialize(self, key):
        '''
        Initialize the OrderModel to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_id)
        :param type: (int, int)

        This should only be called after all subprocess have been forked.
        '''

        self.id = key
        self.spectrum_id, self.order_id = self.id

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(
            self.spectrum_id, self.order_id))

        self.instrument = Instruments[self.spectrum_id]
        self.DataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.DataSpectrum.wls[self.order_id]
        self.fl = self.DataSpectrum.fls[self.order_id]
        self.sigma = self.DataSpectrum.sigmas[self.order_id]
        self.npoints = len(self.wl)
        self.mask = self.DataSpectrum.masks[self.order_id]
        self.order = self.DataSpectrum.orders[self.order_id]

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__,
                                                       self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.npoly = config["cheb_degree"]
        self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum,
                                                   self.order_id,
                                                   npoly=self.npoly)
        self.resid_deque = deque(
            maxlen=500
        )  #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.Emulator = Emulator.open(
            config["PCA_path"])  # Returns mu and var vectors
        self.Emulator.determine_chunk_log(
            self.wl)  # Truncates the grid to this wl format, power of 2

        pg = self.Emulator.PCAGrid

        self.wl_FFT = pg.wl
        self.ncomp = pg.ncomp

        self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis, :],
                                 pg.flux_std[np.newaxis, :], pg.pcomps))

        self.min_v = self.Emulator.min_v
        self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v)
        self.ss[0] = 0.01  # junk so we don't get a divide by zero error

        self.pcomps = np.empty((self.ncomp, self.npoints))
        self.flux_mean = np.empty((self.npoints, ))
        self.flux_std = np.empty((self.npoints, ))
        self.mus, self.vars = None, None
        self.C_GP = None
        self.data_mat = None

        self.sigma_matrix = self.sigma**2 * np.eye(self.npoints)

        self.prior = 0.0  # Modified and set by NuisanceSampler.lnprob
        self.nregions = 0
        self.exceptions = []

        #TODO: perturb
        #if args.perturb:
        #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb)

        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, ))
        cheb_tuple = ("logc0", )
        # add in new coefficients
        for i in range(1, self.npoly):
            cheb_tuple += ("c{}".format(i), )
        # set starting position to 0
        cheb_Starting = {k: 0.0 for k in cheb_tuple}

        # Design cov starting
        cov_Starting = config['cov_params']
        cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting)
        cov_MH_cov = np.array(
            [float(config["cov_jump"][key]) for key in cov_tuple])**2

        nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov)))
        nuisance_starting = {
            "cheb": cheb_Starting,
            "cov": cov_Starting,
            "regions": {}
        }

        # Because this initialization is happening on the subprocess, I think
        # the random state should be fine.

        # Update the outdir based upon id
        self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order)

        # Create the nuisance parameter sampler to run independently
        self.sampler = NuisanceSampler(OrderModel=self,
                                       starting_param_dict=nuisance_starting,
                                       cov=nuisance_MH_cov,
                                       debug=True,
                                       outdir=self.noutdir,
                                       order=self.order)
        self.p0 = self.sampler.p0

        # Udpate the nuisance parameters to the starting values so that we at
        # least have a self.data_mat
        self.logger.info(
            "Updating nuisance parameter data products to starting values.")
        self.update_nuisance(nuisance_starting)
        self.lnprob = None
Beispiel #8
0
class Order:
    def __init__(self, debug=False):
        '''
        This object contains all of the variables necessary for the partial
        lnprob calculation for one echelle order. It is designed to first be
        instantiated within the main processes and then forked to other
        subprocesses. Once operating in the subprocess, the variables specific
        to the order are loaded with an `INIT` message call, which tells which key
        to initialize on in the `self.initialize()`.
        '''
        self.lnprob = -np.inf
        self.lnprob_last = -np.inf

        self.func_dict = {"INIT": self.initialize,
                          "DECIDE": self.decide_Theta,
                          "INST": self.instantiate,
                          "LNPROB": self.lnprob_Theta,
                          "GET_LNPROB": self.get_lnprob,
                          "FINISH": self.finish,
                          "SAVE": self.save,
                          "OPTIMIZE_CHEB": self.optimize_Cheb
                          }

        self.debug = debug
        self.logger = logging.getLogger("{}".format(self.__class__.__name__))

    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.interpolator = Interpolator(self.wl, HDF5Interface())
        self.flux = None # Where the interpolator will store the flux

        self.wl_FFT = self.interpolator.wl

        # The raw eigenspectra and mean flux components

        self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.interpolator.interface.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)

    def instantiate(self, *args):
        '''
        If mixing Theta and Phi optimization/sampling, perform the sigma clipping
        operation to instantiate covariant regions to cover outliers.

        May involve creating a new NuisanceSampler.
        '''
        raise NotImplementedError

    def get_lnprob(self, *args):
        '''
        Return the *current* value of lnprob.

        Intended to be called from the master process to
        query the child processes for their current value of lnprob.
        '''
        return self.lnprob

    def lnprob_Theta(self, p):
        '''
        Update the model to the Theta parameters and then evaluate the lnprob.

        Intended to be called from the master process via the command "LNPROB".
        '''
        try:
            self.update_Theta(p)
            lnp = self.evaluate() # Also sets self.lnprob to new value
            return lnp
        except (C.ModelError, C.InterpolationError):
            self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(p))
            return -np.inf

    def evaluate(self):
        '''
        Return the lnprob using the current version of the C_GP matrix, data matrix,
        and other intermediate products.
        '''

        self.lnprob_last = self.lnprob

        CC = self.data_mat

        model = self.chebyshevSpectrum.k * self.flux

        try:

            factor, flag = cho_factor(CC)

            R = self.fl - model

            logdet = np.sum(2 * np.log((np.diag(factor))))
            self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet)

            self.logger.debug("Evaluating lnprob={}".format(self.lnprob))
            return self.lnprob

        # To give us some debugging information about what went wrong.
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            raise

    def update_Theta(self, p):
        '''
        Update the model to the current Theta parameters.

        :param p: parameters to update model to
        :type p: model.ThetaParam
        '''

        # Dirty hack
        fix_logg = Starfish.config.get("fix_logg", None)
        if fix_logg is not None:
            p.grid[1] = fix_logg
        print("grid pars are", p.grid)

        self.logger.debug("Updating Theta parameters to {}".format(p))

        # Store the current accepted values before overwriting with new proposed values.
        self.flux_last = self.flux

        # Local, shifted copy of wavelengths
        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz))

        flux_raw = self.interpolator(p.grid)

        # If vsini is less than 0.2 km/s, we might run into issues with
        # the grid spacing. Therefore skip the convolution step if we have
        # values smaller than this.
        # FFT and convolve operations
        if p.vsini < 0.0:
            raise C.ModelError("vsini must be positive")
        elif p.vsini < 0.2:
            # Skip the vsini taper due to instrumental effects
            flux_taper = flux_raw
        else:
            FF = np.fft.rfft(flux_raw)

            # Determine the stellar broadening kernel
            ub = 2. * np.pi * p.vsini * self.ss
            sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3)
            # set zeroth frequency to 1 separately (DC term)
            sb[0] = 1.

            # institute vsini taper
            FF_tap = FF * sb

            # do ifft
            flux_taper = np.fft.irfft(FF_tap, len(self.wl_FFT))

        # Spectrum resample operations
        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
            raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT)))

        # Take the output from the FFT operation and stuff it into the respective data products
        interp = InterpolatedUnivariateSpline(wl_FFT, flux_taper, k=5)
        self.flux = interp(self.wl)
        del interp

        gc.collect()

        # Adjust flux_mean and flux_std by Omega
        Omega = 10**p.logOmega
        self.flux *= Omega

    def revert_Theta(self):
        '''
        Revert the status of the model from a rejected Theta proposal.
        '''

        self.logger.debug("Reverting Theta parameters")

        self.lnprob = self.lnprob_last

        self.flux = self.flux_last

    def decide_Theta(self, yes):
        '''
        Interpret the decision from the master process to either revert the
        Theta model (rejected parameters) or move on (accepted parameters).

        :param yes: if True, accept stellar parameters.
        :type yes: boolean
        '''
        if yes:
            # accept and move on
            self.logger.debug("Deciding to accept Theta parameters")
        else:
            # revert and move on
            self.logger.debug("Deciding to revert Theta parameters")
            self.revert_Theta()

        # Proceed with independent sampling
        self.independent_sample(1)

    def optimize_Cheb(self, *args):
        '''
        Keeping the current Theta parameters fixed and assuming white noise,
        optimize the Chebyshev parameters
        '''

        # self.fix_c0 = True if index == (len(DataSpectrum.wls) - 1) else False #Fix the last c0
        # This is necessary if we want to update just a single order.

        if self.chebyshevSpectrum.fix_c0 & len(self.dataSpectrum.wls) > 1:
            p0 = np.zeros((self.npoly - 1))
        else:
            self.chebyshevSpectrum.fix_c0 = False
            p0 = np.zeros((self.npoly))

        def fprob(p):
            self.chebyshevSpectrum.update(p)
            lnp = self.evaluate()
            print(self.order, p, lnp)
            if lnp == -np.inf:
                return 1e99
            else:
                return -lnp

        from scipy.optimize import fmin
        result = fmin(fprob, p0, maxiter=10000, maxfun=10000)
        print(self.order, result)

        # Due to a JSON bug, np.int64 type objects will get read twice,
        # and cause this routine to fail. Therefore we have to be careful
        # to convert these to ints.
        phi = PhiParam(spectrum_id=int(self.spectrum_id), order=int(self.order), fix_c0=self.chebyshevSpectrum.fix_c0, cheb=result)
        phi.save()

    def update_Phi(self, p):
        '''
        Update the Phi parameters and data covariance matrix.

        :param params: large dictionary containing cheb, cov, and regions
        '''

        raise NotImplementedError

    def revert_Phi(self, *args):
        '''
        Revert all products from the nuisance parameters, including the data
        covariance matrix.
        '''

        self.logger.debug("Reverting Phi parameters")

        self.lnprob = self.lnprob_last

        self.chebyshevSpectrum.revert()
        self.data_mat = self.data_mat_last

    def clear_resid_deque(self):
        '''
        Clear the accumulated residual spectra.
        '''
        self.resid_deque.clear()

    def independent_sample(self, niter):
        '''
        Do the independent sampling specific to this echelle order, using the
        attached self.sampler (NuisanceSampler).

        :param niter: number of iterations to complete before returning to master process.

        '''

        self.logger.debug("Beginning independent sampling on Phi parameters")

        if self.lnprob:
            # If we have a current value, pass it to the sampler
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter, lnprob0=self.lnprob)
        else:
            # Otherwise, start from the beginning
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter)

        self.logger.debug("Finished independent sampling on Phi parameters")
        # Don't return anything to the master process.

    def finish(self, *args):
        '''
        Wrap up the sampling and write the samples to disk.
        '''
        self.logger.debug("Finishing")

    def brain(self, conn):
        '''
        The infinite loop of the subprocess, which continues to listen for
        messages on the pipe.
        '''
        self.conn = conn
        alive = True
        while alive:
            #Keep listening for messages put on the Pipe
            alive = self.interpret()
            #Once self.interpret() returns `False`, this loop will die.
        self.conn.send("DEAD")

    def interpret(self):
        '''
        Interpret the messages being put into the Pipe, and do something with
        them. Messages are always sent in a 2-arg tuple (fname, arg)
        Right now we only expect one function and one argument but this could
        be generalized to **args.
        '''
        #info("brain")

        fname, arg = self.conn.recv() # Waits here to receive a new message
        self.logger.debug("{} received message {}".format(os.getpid(), (fname, arg)))

        func = self.func_dict.get(fname, False)
        if func:
            response = func(arg)
        else:
            self.logger.info("Given an unknown function {}, assuming kill signal.".format(fname))
            return False

        # Functions only return a response other than None when they want them
        # communicated back to the master process.
        # Some commands sent to the child processes do not require a response
        # to the main process.
        if response:
            self.logger.debug("{} sending back {}".format(os.getpid(), response))
            self.conn.send(response)
        return True

    def save(self, *args):
        '''
        Using the current values for flux, write out the data, mean model, and mean
        residuals into a JSON.
        '''

        resid = self.fl - self.flux

        my_dict = {"wl":self.wl.tolist(), "data":self.fl.tolist(), "model":self.flux.tolist(), "resid":resid.tolist(), "sigma":self.sigma.tolist(), "spectrum_id":self.spectrum_id, "order":self.order}

        fname = Starfish.specfmt.format(self.spectrum_id, self.order)
        f = open(fname + "spec.json", 'w')
        json.dump(my_dict, f, indent=2, sort_keys=True)
        f.close()
Beispiel #9
0
class OrderModel:
    def __init__(self, debug=False):
        '''
        This object contains all of the variables necessary for the partial
        lnprob calculation for one echelle order. It is designed to first be
        instantiated within the main processes and then forked to other
        subprocesses. Once operating in the subprocess, the variables specific
        to the order are loaded with an `INIT` message call, which tells which key
        to initialize on in the `self.initialize()`.
        '''
        self.lnprob = -np.inf
        self.lnprob_last = -np.inf

        self.func_dict = {
            "INIT": self.initialize,
            "DECIDE": self.decide_stellar,
            "INST": self.instantiate,
            "LNPROB": self.stellar_lnprob,
            "GET_LNPROB": self.get_lnprob,
            "FINISH": self.finish
        }

        self.debug = debug

    def initialize(self, key):
        '''
        Initialize the OrderModel to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_id)
        :param type: (int, int)

        This should only be called after all subprocess have been forked.
        '''

        self.id = key
        self.spectrum_id, self.order_id = self.id

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(
            self.spectrum_id, self.order_id))

        self.instrument = Instruments[self.spectrum_id]
        self.DataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.DataSpectrum.wls[self.order_id]
        self.fl = self.DataSpectrum.fls[self.order_id]
        self.sigma = self.DataSpectrum.sigmas[self.order_id]
        self.npoints = len(self.wl)
        self.mask = self.DataSpectrum.masks[self.order_id]
        self.order = self.DataSpectrum.orders[self.order_id]

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__,
                                                       self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.npoly = config["cheb_degree"]
        self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum,
                                                   self.order_id,
                                                   npoly=self.npoly)
        self.resid_deque = deque(
            maxlen=500
        )  #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.Emulator = Emulator.open(
            config["PCA_path"])  # Returns mu and var vectors
        self.Emulator.determine_chunk_log(
            self.wl)  # Truncates the grid to this wl format, power of 2

        pg = self.Emulator.PCAGrid

        self.wl_FFT = pg.wl
        self.ncomp = pg.ncomp

        self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis, :],
                                 pg.flux_std[np.newaxis, :], pg.pcomps))

        self.min_v = self.Emulator.min_v
        self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v)
        self.ss[0] = 0.01  # junk so we don't get a divide by zero error

        self.pcomps = np.empty((self.ncomp, self.npoints))
        self.flux_mean = np.empty((self.npoints, ))
        self.flux_std = np.empty((self.npoints, ))
        self.mus, self.vars = None, None
        self.C_GP = None
        self.data_mat = None

        self.sigma_matrix = self.sigma**2 * np.eye(self.npoints)

        self.prior = 0.0  # Modified and set by NuisanceSampler.lnprob
        self.nregions = 0
        self.exceptions = []

        #TODO: perturb
        #if args.perturb:
        #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb)

        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, ))
        cheb_tuple = ("logc0", )
        # add in new coefficients
        for i in range(1, self.npoly):
            cheb_tuple += ("c{}".format(i), )
        # set starting position to 0
        cheb_Starting = {k: 0.0 for k in cheb_tuple}

        # Design cov starting
        cov_Starting = config['cov_params']
        cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting)
        cov_MH_cov = np.array(
            [float(config["cov_jump"][key]) for key in cov_tuple])**2

        nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov)))
        nuisance_starting = {
            "cheb": cheb_Starting,
            "cov": cov_Starting,
            "regions": {}
        }

        # Because this initialization is happening on the subprocess, I think
        # the random state should be fine.

        # Update the outdir based upon id
        self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order)

        # Create the nuisance parameter sampler to run independently
        self.sampler = NuisanceSampler(OrderModel=self,
                                       starting_param_dict=nuisance_starting,
                                       cov=nuisance_MH_cov,
                                       debug=True,
                                       outdir=self.noutdir,
                                       order=self.order)
        self.p0 = self.sampler.p0

        # Udpate the nuisance parameters to the starting values so that we at
        # least have a self.data_mat
        self.logger.info(
            "Updating nuisance parameter data products to starting values.")
        self.update_nuisance(nuisance_starting)
        self.lnprob = None

    def instantiate(self, *args):
        '''
        Clear the old NuisanceSampler, instantiate the regions using the stored
        residual spectra, and create a new NuisanceSampler.
        '''

        # threshold for sigma clipping
        sigma = config["sigma_clip"]

        # array that specifies if a pixel is already covered.
        # to start, it should be all False
        covered = np.zeros((self.npoints, ), dtype='bool')

        #average all of the spectra in the deque together
        residual_array = np.array(self.resid_deque)
        if len(self.resid_deque) == 0:
            raise RuntimeError("No residual spectra stored yet.")
        else:
            residuals = np.average(residual_array, axis=0)

        # run the sigma_clip algorithm until converged, and we've identified the outliers
        filtered_data = sigma_clip(residuals, sig=sigma, iters=None)
        mask = filtered_data.mask
        wl = self.wl

        sigma0 = config['region_priors']['sigma0']
        logAmp = config["region_params"]["logAmp"]
        sigma = config["region_params"]["sigma"]

        # Sort in decreasing strength of residual
        self.nregions = 0
        regions = {}

        region_mus = {}
        for w, resid in sorted(zip(wl[mask], np.abs(residuals[mask])),
                               key=itemgetter(1),
                               reverse=True):
            if w in wl[covered]:
                continue
            else:
                # check to make sure region is not *right* at the edge of the echelle order
                if w <= np.min(wl) or w >= np.max(wl):
                    continue
                else:
                    # instantiate region and update coverage

                    # Default amp and sigma values
                    regions[self.nregions] = {
                        "logAmp": logAmp,
                        "sigma": sigma,
                        "mu": w
                    }
                    region_mus[
                        self.nregions] = w  # for evaluating the mu prior
                    self.nregions += 1

                    # determine the stretch of wl covered by this new region
                    ind = (wl >= (w - sigma0)) & (wl <= (w + sigma0))
                    # update the covered regions
                    covered = covered | ind

        # Take the current nuisance positions as a starting point, and add the regions
        starting_dict = self.sampler.params.copy()
        starting_dict["regions"] = regions

        region_mus = np.array([region_mus[i] for i in range(self.nregions)])

        # Setup the priors
        region_priors = config["region_priors"]
        region_priors.update({"mus": region_mus})
        prior_params = {"regions": region_priors}

        # do all this crap again
        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly, ))
        cov_MH_cov = np.array([
            float(config["cov_jump"][key]) for key in self.sampler.cov_tup
        ])**2
        region_MH_cov = [
            float(config["region_jump"][key])**2
            for key in C.cov_region_parameters
        ]
        regions_MH_cov = np.array(
            [region_MH_cov for i in range(self.nregions)]).flatten()

        nuisance_MH_cov = np.diag(
            np.concatenate((cheb_MH_cov, cov_MH_cov, regions_MH_cov)))

        print(starting_dict)
        print("cov shape {}".format(nuisance_MH_cov.shape))

        # Initialize a new sampler, replacing the old one
        self.sampler = NuisanceSampler(OrderModel=self,
                                       starting_param_dict=starting_dict,
                                       cov=nuisance_MH_cov,
                                       debug=True,
                                       outdir=self.noutdir,
                                       prior_params=prior_params,
                                       order=self.order)

        self.p0 = self.sampler.p0

        # Update the nuisance parameters to the starting values so that we at least have a self.data_mat
        print("Updating nuisance parameter data products to starting values.")
        self.update_nuisance(starting_dict)
        self.lnprob = self.evaluate()

        # To speed up convergence, try just doing a bunch of nuisance runs before
        # going into the iteration pattern
        print("Doing nuisance burn-in for {} samples".format(
            config["nuisance_burn"]))
        self.independent_sample(config["nuisance_burn"])

    def get_lnprob(self, *args):
        '''
        Return the *current* value of lnprob.

        Intended to be called from the master process (StellarSampler.sample), to
        query the child processes for their current value of lnprob.
        '''
        return self.lnprob

    def stellar_lnprob(self, params):
        '''
        Update the model to the parameters and then evaluate the lnprob.

        Intended to be called from the master process via the command "LNPROB".
        '''

        try:
            self.update_stellar(params)
            lnp = self.evaluate()  # Also sets self.lnprob to new value
            return lnp
        except C.ModelError:
            self.logger.debug(
                "ModelError in stellar parameters, sending back -np.inf {}".
                format(params))
            return -np.inf

    def evaluate(self):
        '''
        Return the lnprob using the current version of the DataCovariance matrix
        and other intermediate products.
        '''
        self.lnprob_last = self.lnprob

        X = (self.ChebyshevSpectrum.k * self.flux_std *
             np.eye(self.npoints)).dot(self.pcomps.T)

        CC = X.dot(self.C_GP.dot(X.T)) + self.data_mat

        R = self.fl - self.ChebyshevSpectrum.k * self.flux_mean - X.dot(
            self.mus)

        try:
            factor, flag = cho_factor(CC)
        except np.linalg.LinAlgError as e:
            self.logger.debug("self.sampler.params are {}".format(
                self.sampler.params))
            raise C.ModelError("Can't Cholesky factor {}".format(e))

        logdet = np.sum(2 * np.log((np.diag(factor))))

        self.lnprob = -0.5 * (np.dot(R, cho_solve(
            (factor, flag), R)) + logdet) + self.prior

        if self.counter % 100 == 0:
            self.resid_deque.append(R)

        self.counter += 1

        return self.lnprob

    def revert_stellar(self):
        '''
        Revert the status of the model from a rejected stellar proposal.
        '''

        self.logger.debug("Reverting stellar parameters")

        self.lnprob = self.lnprob_last

        self.flux_mean = self.flux_mean_last
        self.flux_std = self.flux_std_last
        self.pcomps = self.pcomps_last

        self.mus, self.vars = self.mus_last, self.vars_last
        self.C_GP = self.C_GP_last

    def update_stellar(self, params):
        '''
        Update the model to the current stellar parameters.
        '''

        self.logger.debug("Updating stellar parameters to {}".format(params))

        # Store the current accepted values before overwriting with new proposed values.
        self.flux_mean_last = self.flux_mean
        self.flux_std_last = self.flux_std
        self.pcomps_last = self.pcomps
        self.mus_last, self.vars_last = self.mus, self.vars
        self.C_GP_last = self.C_GP

        #TODO: Possible speedups:
        # 1. Store the PCOMPS pre-FFT'd

        # Shift the velocity
        vz = params["vz"]
        # Local, shifted copy
        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + vz) / (C.c_kms - vz))

        # FFT and convolve operations
        vsini = params["vsini"]

        if vsini < 0.2:
            raise C.ModelError("vsini must be positive")

        FF = np.fft.rfft(self.PCOMPS, axis=1)

        # Determine the stellar broadening kernel
        ub = 2. * np.pi * vsini * self.ss
        sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub**2) + 3. * np.sin(ub) / (
            2 * ub**3)
        # set zeroth frequency to 1 separately (DC term)
        sb[0] = 1.

        # institute velocity and instrumental taper
        FF_tap = FF * sb

        # do ifft
        pcomps_full = np.fft.irfft(FF_tap, len(wl_FFT), axis=1)

        # Spectrum resample operations
        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
            raise RuntimeError(
                "Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({"
                ":.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT),
                                       max(wl_FFT)))

        # Take the output from the FFT operation (pcomps_full), and stuff them
        # into respective data products
        for lres, hres in zip(
                chain([self.flux_mean, self.flux_std], self.pcomps),
                pcomps_full):
            interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5)
            lres[:] = interp(self.wl)
            del interp

        gc.collect()

        # Adjust flux_mean and flux_std by Omega
        Omega = 10**params["logOmega"]
        self.flux_mean *= Omega
        self.flux_std *= Omega

        # Now update the parameters from the emulator
        pars = np.array([params["temp"], params["logg"], params["Z"]])

        # If pars are outside the grid, Emulator will raise C.ModelError
        self.mus, self.vars = self.Emulator(pars)

        self.C_GP = self.vars * np.eye(self.ncomp)

    def decide_stellar(self, yes):
        '''
        Interpret the decision from the master process to either revert the
        stellar model (rejected parameters) or move on (accepted parameters).
        '''
        if yes:
            # accept and move on
            self.logger.debug("Deciding to accept stellar parameters")
        else:
            # revert and move on
            self.logger.debug("Deciding to revert stellar parameters")
            self.revert_stellar()

        # Proceed with independent sampling
        self.independent_sample(1)

    def update_nuisance(self, params):
        '''
        Update the nuisance parameters and data covariance matrix.

        :param params: large dictionary containing cheb, cov, and regions
        '''

        self.logger.debug("Updating nuisance parameters to {}".format(params))
        # Read off the Chebyshev parameters and update
        self.ChebyshevSpectrum.update(params["cheb"])

        # Create the full data covariance matrix.
        l = params["cov"]["l"]
        sigAmp = params["cov"]["sigAmp"]

        # Check to make sure the global covariance parameters make sense
        if sigAmp < 0.1:
            raise C.ModelError(
                "sigAmp shouldn't be lower than 0.1, something is wrong.")

        max_r = 6.0 * l  # [km/s]

        # Check all regions, take the max
        if self.nregions > 0:
            regions = params["regions"]
            keys = sorted(regions)
            sigmas = np.array([regions[key]["sigma"] for key in keys])  #km/s
            #mus = np.array([regions[key]["mu"] for key in keys])
            max_reg = 4.0 * np.max(sigmas)
            #If this is a larger distance than the global length, replace it
            max_r = max_reg if max_reg > max_r else max_r
            #print("Max_r now set by regions {}".format(max_r))

        # print("max_r is {}".format(max_r))

        # Create a partial function which returns the proper element.
        k_func = make_k_func(params)

        # Store the previous data matrix in case we want to revert later
        self.data_mat_last = self.data_mat
        self.data_mat = get_dense_C(self.wl, k_func=k_func,
                                    max_r=max_r) + sigAmp * self.sigma_matrix

    def revert_nuisance(self, *args):
        '''
        Revert all products from the nuisance parameters, including the data
        covariance matrix.
        '''

        self.logger.debug("Reverting nuisance parameters")

        self.lnprob = self.lnprob_last

        self.ChebyshevSpectrum.revert()
        self.data_mat = self.data_mat_last

    def clear_resid_deque(self):
        '''
        Clear the accumulated residual spectra.
        '''
        self.resid_deque.clear()

    def independent_sample(self, niter):
        '''
        Do the independent sampling specific to this echelle order, using the
        attached self.sampler (NuisanceSampler).

        :param niter: number of iterations to complete before returning to master process.

        '''

        self.logger.debug(
            "Beginning independent sampling on nuisance parameters")

        if self.lnprob:
            # If we have a current value, pass it to the sampler
            self.p0, self.lnprob, state = self.sampler.run_mcmc(
                pos0=self.p0, N=niter, lnprob0=self.lnprob)
        else:
            # Otherwise, start from the beginning
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0,
                                                                N=niter)

        self.logger.debug(
            "Finished independent sampling on nuisance parameters")
        # Don't return anything to the master process.

    def finish(self, *args):
        '''
        Wrap up the sampling and write the samples to disk.
        '''

        print(self.sampler.acceptance_fraction)
        print(self.sampler.acor)
        self.sampler.write()
        self.sampler.plot()  # triangle_plot=True
        print("There were {} exceptions.".format(len(self.exceptions)))
        # print out the values of each region key.
        for exception in self.exceptions:
            regions = exception["regions"]
            keys = sorted(regions)
            for key in keys:
                print(regions[key])
            cov = exception["cov"]
            print(cov)
            print("\n\n")

    def brain(self, conn):
        '''
        The infinite loop of the subprocess, which continues to listen for
        messages on the pipe.
        '''
        self.conn = conn
        alive = True
        while alive:
            #Keep listening for messages put on the Pipe
            alive = self.interpret()
            #Once self.interpret() returns `False`, this loop will die.
        self.conn.send("DEAD")

    def interpret(self):
        '''
        Interpret the messages being put into the Pipe, and do something with
        them. Messages are always sent in a 2-arg tuple (fname, arg)
        Right now we only expect one function and one argument but this could
        be generalized to **args.
        '''
        #info("brain")

        fname, arg = self.conn.recv()  # Waits here to receive a new message
        self.logger.debug("{} received message {}".format(
            os.getpid(), (fname, arg)))

        func = self.func_dict.get(fname, False)
        if func:
            response = func(arg)
        else:
            self.logger.info(
                "Given an unknown function {}, assuming kill signal.".format(
                    fname))
            return False

        # Functions only return a response other than None when they want them
        # communicated back to the master process.
        # Some commands sent to the child processes do not require a response
        # to the main process.
        if response:
            self.logger.debug("{} sending back {}".format(
                os.getpid(), response))
            self.conn.send(response)
        return True
sigma_mat = sigma**2 * np.eye(ndata)
mus, C_GP, data_mat = None, None, None

# For each star

# In the config file, list the astroseismic parameters as the starting grid parameters
# Read this into a ThetaParam object
grid = np.array(Starfish.config["Theta"]["grid"])
# Now update the parameters for the emulator
# If pars are outside the grid, Emulator will raise C.ModelError
emulator.params = grid
mus, C_GP = emulator.matrix

npoly = Starfish.config["cheb_degree"]
chebyshevSpectrum = ChebyshevSpectrum(dataSpec, 0, npoly=npoly)
chebyshevSpectrum.update(np.array(Starfish.config["chebs"]))


def lnprob(p):
    vz, vsini, logOmega = p[:3]
    cheb = p[3:]

    chebyshevSpectrum.update(cheb)

    # Local, shifted copy of wavelengths
    wl_FFT = wl_FFT_orig * np.sqrt((C.c_kms + vz) / (C.c_kms - vz))

    # Holders to store the convolved and resampled eigenspectra
    eigenspectra = np.empty((pca.m, ndata))
    flux_mean = np.empty((ndata, ))
Beispiel #11
0
class Order:
    def __init__(self, debug=False):
        '''
        This object contains all of the variables necessary for the partial
        lnprob calculation for one echelle order. It is designed to first be
        instantiated within the main processes and then forked to other
        subprocesses. Once operating in the subprocess, the variables specific
        to the order are loaded with an `INIT` message call, which tells which key
        to initialize on in the `self.initialize()`.
        '''
        self.lnprob = -np.inf
        self.lnprob_last = -np.inf

        self.func_dict = {"INIT": self.initialize,
                          "DECIDE": self.decide_Theta,
                          "INST": self.instantiate,
                          "LNPROB": self.lnprob_Theta,
                          "GET_LNPROB": self.get_lnprob,
                          "FINISH": self.finish,
                          "SAVE": self.save,
                          "OPTIMIZE_CHEB": self.optimize_Cheb
                          }

        self.debug = debug
        self.logger = logging.getLogger("{}".format(self.__class__.__name__))

    def initialize(self, key):
        '''
        Initialize to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_key)
        :param type: (int, int)

        This method should only be called after all subprocess have been forked.
        '''

        self.id = key
        spectrum_id, self.order_key = self.id
        # Make sure these are ints
        self.spectrum_id = int(spectrum_id)

        self.instrument = Instruments[self.spectrum_id]
        self.dataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.dataSpectrum.wls[self.order_key]
        self.fl = self.dataSpectrum.fls[self.order_key]
        self.sigma = self.dataSpectrum.sigmas[self.order_key]
        self.ndata = len(self.wl)
        self.mask = self.dataSpectrum.masks[self.order_key]
        self.order = int(self.dataSpectrum.orders[self.order_key])

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_key))

        self.npoly = Starfish.config["cheb_degree"]
        self.chebyshevSpectrum = ChebyshevSpectrum(self.dataSpectrum, self.order_key, npoly=self.npoly)

        # If the file exists, optionally initiliaze to the chebyshev values
        fname = Starfish.specfmt.format(self.spectrum_id, self.order) + "phi.json"
        if os.path.exists(fname):
            self.logger.debug("Loading stored Chebyshev parameters.")
            phi = PhiParam.load(fname)
            self.chebyshevSpectrum.update(phi.cheb)

        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.emulator = Emulator.open()
        self.emulator.determine_chunk_log(self.wl)

        self.pca = self.emulator.pca

        self.wl_FFT = self.pca.wl

        # The raw eigenspectra and mean flux components
        self.EIGENSPECTRA = np.vstack((self.pca.flux_mean[np.newaxis,:], self.pca.flux_std[np.newaxis,:], self.pca.eigenspectra))

        self.ss = np.fft.rfftfreq(self.pca.npix, d=self.emulator.dv)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        # Holders to store the convolved and resampled eigenspectra
        self.eigenspectra = np.empty((self.pca.m, self.ndata))
        self.flux_mean = np.empty((self.ndata,))
        self.flux_std = np.empty((self.ndata,))

        self.sigma_mat = self.sigma**2 * np.eye(self.ndata)
        self.mus, self.C_GP, self.data_mat = None, None, None

        self.lnprior = 0.0 # Modified and set by NuisanceSampler.lnprob

        # self.nregions = 0
        # self.exceptions = []

        # Update the outdir based upon id
        self.noutdir = Starfish.routdir + "{}/{}/".format(self.spectrum_id, self.order)

    def instantiate(self, *args):
        '''
        If mixing Theta and Phi optimization/sampling, perform the sigma clipping
        operation to instantiate covariant regions to cover outliers.

        May involve creating a new NuisanceSampler.
        '''
        raise NotImplementedError

    def get_lnprob(self, *args):
        '''
        Return the *current* value of lnprob.

        Intended to be called from the master process to
        query the child processes for their current value of lnprob.
        '''
        return self.lnprob

    def lnprob_Theta(self, p):
        '''
        Update the model to the Theta parameters and then evaluate the lnprob.

        Intended to be called from the master process via the command "LNPROB".
        '''
        try:
            self.update_Theta(p)
            lnp = self.evaluate() # Also sets self.lnprob to new value
            return lnp
        except C.ModelError:
            self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(p))
            return -np.inf

    def evaluate(self):
        '''
        Return the lnprob using the current version of the C_GP matrix, data matrix,
        and other intermediate products.
        '''

        self.lnprob_last = self.lnprob

        X = (self.chebyshevSpectrum.k * self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T)

        CC = X.dot(self.C_GP.dot(X.T)) + self.data_mat

        try:
            factor, flag = cho_factor(CC)
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            self.CC_debugger(CC)
            raise

        try:
            R = self.fl - self.chebyshevSpectrum.k * self.flux_mean - X.dot(self.mus)

            logdet = np.sum(2 * np.log((np.diag(factor))))
            self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet)

            self.logger.debug("Evaluating lnprob={}".format(self.lnprob))
            return self.lnprob

        # To give us some debugging information about what went wrong.
        except np.linalg.linalg.LinAlgError:
            print("Spectrum:", self.spectrum_id, "Order:", self.order)
            raise

    def CC_debugger(self, CC):
        '''
        Special debugging information for the covariance matrix decomposition.
        '''
        print('{:-^60}'.format('CC_debugger'))
        print("See https://github.com/iancze/Starfish/issues/26")
        print("Covariance matrix at a glance:")
        if (CC.diagonal().min() < 0.0):
            print("- Negative entries on the diagonal:")
            print("\t- Check sigAmp: should be positive")
            print("\t- Check uncertainty estimates: should all be positive")
        elif np.any(np.isnan(CC.diagonal())):
            print("- Covariance matrix has a NaN value on the diagonal")
        else:
            if not np.allclose(CC, CC.T):
                print("- The covariance matrix is highly asymmetric")

            #Still might have an asymmetric matrix below `allclose` threshold
            evals_CC, evecs_CC = np.linalg.eigh(CC)
            n_neg = (evals_CC < 0).sum()
            n_tot = len(evals_CC)
            print("- There are {} negative eigenvalues out of {}.".format(n_neg, n_tot))
            mark = lambda val: '>' if val < 0 else '.'

            print("Covariance matrix eigenvalues:")
            print(*["{: >6} {:{fill}>20.3e}".format(i, evals_CC[i], 
                                                    fill=mark(evals_CC[i])) for i in range(10)], sep='\n')
            print('{: >15}'.format('...'))
            print(*["{: >6} {:{fill}>20.3e}".format(n_tot-10+i, evals_CC[-10+i], 
                                                   fill=mark(evals_CC[-10+i])) for i in range(10)], sep='\n')
        print('{:-^60}'.format('-'))

    def update_Theta(self, p):
        '''
        Update the model to the current Theta parameters.

        :param p: parameters to update model to
        :type p: model.ThetaParam
        '''

        # durty HACK to get fixed logg
        # Simply fixes the middle value to be 4.29
        # Check to see if it exists, as well
        fix_logg = Starfish.config.get("fix_logg", None)
        if fix_logg is not None:
            p.grid[1] = fix_logg
        print("grid pars are", p.grid)

        self.logger.debug("Updating Theta parameters to {}".format(p))

        # Store the current accepted values before overwriting with new proposed values.
        self.flux_mean_last = self.flux_mean.copy()
        self.flux_std_last = self.flux_std.copy()
        self.eigenspectra_last = self.eigenspectra.copy()
        self.mus_last = self.mus
        self.C_GP_last = self.C_GP

        # Local, shifted copy of wavelengths
        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + p.vz) / (C.c_kms - p.vz))

        # If vsini is less than 0.2 km/s, we might run into issues with
        # the grid spacing. Therefore skip the convolution step if we have
        # values smaller than this.
        # FFT and convolve operations
        if p.vsini < 0.0:
            raise C.ModelError("vsini must be positive")
        elif p.vsini < 0.2:
            # Skip the vsini taper due to instrumental effects
            eigenspectra_full = self.EIGENSPECTRA.copy()
        else:
            FF = np.fft.rfft(self.EIGENSPECTRA, axis=1)

            # Determine the stellar broadening kernel
            ub = 2. * np.pi * p.vsini * self.ss
            sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3)
            # set zeroth frequency to 1 separately (DC term)
            sb[0] = 1.

            # institute vsini taper
            FF_tap = FF * sb

            # do ifft
            eigenspectra_full = np.fft.irfft(FF_tap, self.pca.npix, axis=1)

        # Spectrum resample operations
        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
            raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({:.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT)))

        # Take the output from the FFT operation (eigenspectra_full), and stuff them
        # into respective data products
        for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.eigenspectra), eigenspectra_full):
            interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5)
            lres[:] = interp(self.wl)
            del interp

        # Helps keep memory usage low, seems like the numpy routine is slow
        # to clear allocated memory for each iteration.
        gc.collect()

        # Adjust flux_mean and flux_std by Omega
        Omega = 10**p.logOmega
        self.flux_mean *= Omega
        self.flux_std *= Omega



        # Now update the parameters from the emulator
        # If pars are outside the grid, Emulator will raise C.ModelError
        self.emulator.params = p.grid
        self.mus, self.C_GP = self.emulator.matrix

    def revert_Theta(self):
        '''
        Revert the status of the model from a rejected Theta proposal.
        '''

        self.logger.debug("Reverting Theta parameters")

        self.lnprob = self.lnprob_last

        self.flux_mean = self.flux_mean_last
        self.flux_std = self.flux_std_last
        self.eigenspectra = self.eigenspectra_last

        self.mus = self.mus_last
        self.C_GP = self.C_GP_last

    def decide_Theta(self, yes):
        '''
        Interpret the decision from the master process to either revert the
        Theta model (rejected parameters) or move on (accepted parameters).

        :param yes: if True, accept stellar parameters.
        :type yes: boolean
        '''
        if yes:
            # accept and move on
            self.logger.debug("Deciding to accept Theta parameters")
        else:
            # revert and move on
            self.logger.debug("Deciding to revert Theta parameters")
            self.revert_Theta()

        # Proceed with independent sampling
        self.independent_sample(1)

    def optimize_Cheb(self, *args):
        '''
        Keeping the current Theta parameters fixed and assuming white noise,
        optimize the Chebyshev parameters
        '''

        if self.chebyshevSpectrum.fix_c0:
            p0 = np.zeros((self.npoly - 1))
            self.fix_c0 = True
        else:
            p0 = np.zeros((self.npoly))
            self.fix_c0 = False

        def fprob(p):
            self.chebyshevSpectrum.update(p)
            lnp = self.evaluate()
            print(self.order, p, lnp)
            if lnp == -np.inf:
                return 1e99
            else:
                return -lnp

        from scipy.optimize import fmin
        result = fmin(fprob, p0, maxiter=10000, maxfun=10000)
        print(self.order, result)

        # Due to a JSON bug, np.int64 type objects will get read twice,
        # and cause this routine to fail. Therefore we have to be careful
        # to convert these to ints.
        phi = PhiParam(spectrum_id=int(self.spectrum_id), order=int(self.order), fix_c0=self.chebyshevSpectrum.fix_c0, cheb=result)
        phi.save()

    def update_Phi(self, p):
        '''
        Update the Phi parameters and data covariance matrix.

        :param params: large dictionary containing cheb, cov, and regions
        '''

        raise NotImplementedError

    def revert_Phi(self, *args):
        '''
        Revert all products from the nuisance parameters, including the data
        covariance matrix.
        '''

        self.logger.debug("Reverting Phi parameters")

        self.lnprob = self.lnprob_last

        self.chebyshevSpectrum.revert()
        self.data_mat = self.data_mat_last

    def clear_resid_deque(self):
        '''
        Clear the accumulated residual spectra.
        '''
        self.resid_deque.clear()

    def independent_sample(self, niter):
        '''
        Do the independent sampling specific to this echelle order, using the
        attached self.sampler (NuisanceSampler).

        :param niter: number of iterations to complete before returning to master process.

        '''

        self.logger.debug("Beginning independent sampling on Phi parameters")

        if self.lnprob:
            # If we have a current value, pass it to the sampler
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter, lnprob0=self.lnprob)
        else:
            # Otherwise, start from the beginning
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter)

        self.logger.debug("Finished independent sampling on Phi parameters")
        # Don't return anything to the master process.

    def finish(self, *args):
        '''
        Wrap up the sampling and write the samples to disk.
        '''
        self.logger.debug("Finishing")

    def brain(self, conn):
        '''
        The infinite loop of the subprocess, which continues to listen for
        messages on the pipe.
        '''
        self.conn = conn
        alive = True
        while alive:
            #Keep listening for messages put on the Pipe
            alive = self.interpret()
            #Once self.interpret() returns `False`, this loop will die.
        self.conn.send("DEAD")

    def interpret(self):
        '''
        Interpret the messages being put into the Pipe, and do something with
        them. Messages are always sent in a 2-arg tuple (fname, arg)
        Right now we only expect one function and one argument but this could
        be generalized to **args.
        '''
        #info("brain")

        fname, arg = self.conn.recv() # Waits here to receive a new message
        self.logger.debug("{} received message {}".format(os.getpid(), (fname, arg)))

        func = self.func_dict.get(fname, False)
        if func:
            response = func(arg)
        else:
            self.logger.info("Given an unknown function {}, assuming kill signal.".format(fname))
            return False

        # Functions only return a response other than None when they want them
        # communicated back to the master process.
        # Some commands sent to the child processes do not require a response
        # to the main process.
        if response:
            self.logger.debug("{} sending back {}".format(os.getpid(), response))
            self.conn.send(response)
        return True

    def save(self, *args):
        '''
        Using the current values for flux, write out the data, mean model, and mean
        residuals into a JSON.
        '''

        X = (self.chebyshevSpectrum.k * self.flux_std * np.eye(self.ndata)).dot(self.eigenspectra.T)

        model = self.chebyshevSpectrum.k * self.flux_mean + X.dot(self.mus)
        resid = self.fl - model

        my_dict = {"wl":self.wl.tolist(), "data":self.fl.tolist(), "model":model.tolist(), "resid":resid.tolist(), "sigma":self.sigma.tolist(), "spectrum_id":self.spectrum_id, "order":self.order}

        fname = Starfish.specfmt.format(self.spectrum_id, self.order)
        f = open(fname + "spec.json", 'w')
        json.dump(my_dict, f, indent=2, sort_keys=True)
        f.close()
Beispiel #12
0
    def initialize(self, key):
        '''
        Initialize the OrderModel to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_id)
        :param type: (int, int)

        This should only be called after all subprocess have been forked.
        '''

        self.id = key
        self.spectrum_id, self.order_id = self.id

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_id))

        self.instrument = Instruments[self.spectrum_id]
        self.DataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.DataSpectrum.wls[self.order_id]
        self.fl = self.DataSpectrum.fls[self.order_id]
        self.sigma = self.DataSpectrum.sigmas[self.order_id]
        self.npoints = len(self.wl)
        self.mask = self.DataSpectrum.masks[self.order_id]
        self.order = self.DataSpectrum.orders[self.order_id]

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.npoly = config["cheb_degree"]
        self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly)
        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.Emulator = Emulator.open(config["PCA_path"]) # Returns mu and var vectors
        self.Emulator.determine_chunk_log(self.wl) # Truncates the grid to this wl format, power of 2

        pg = self.Emulator.PCAGrid

        self.wl_FFT = pg.wl
        self.ncomp = pg.ncomp

        self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis,:], pg.flux_std[np.newaxis,:], pg.pcomps))

        self.min_v = self.Emulator.min_v
        self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        self.pcomps = np.empty((self.ncomp, self.npoints))
        self.flux_mean = np.empty((self.npoints,))
        self.flux_std = np.empty((self.npoints,))
        self.mus, self.vars = None, None
        self.C_GP = None
        self.data_mat = None

        self.sigma_matrix = self.sigma**2 * np.eye(self.npoints)

        self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob
        self.nregions = 0
        self.exceptions = []

        #TODO: perturb
        #if args.perturb:
            #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb)

        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,))
        cheb_tuple = ("logc0",)
        # add in new coefficients
        for i in range(1, self.npoly):
            cheb_tuple += ("c{}".format(i),)
        # set starting position to 0
        cheb_Starting = {k:0.0 for k in cheb_tuple}

        # Design cov starting
        cov_Starting = config['cov_params']
        cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting)
        cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in cov_tuple])**2

        nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov)))
        nuisance_starting = {"cheb": cheb_Starting, "cov": cov_Starting, "regions":{}}

        # Because this initialization is happening on the subprocess, I think
        # the random state should be fine.

        # Update the outdir based upon id
        self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order)

        # Create the nuisance parameter sampler to run independently
        self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov,
                                       debug=True, outdir=self.noutdir, order=self.order)
        self.p0 = self.sampler.p0

        # Udpate the nuisance parameters to the starting values so that we at
        # least have a self.data_mat
        self.logger.info("Updating nuisance parameter data products to starting values.")
        self.update_nuisance(nuisance_starting)
        self.lnprob = None
Beispiel #13
0
class OrderModel:
    def __init__(self, debug=False):
        '''
        This object contains all of the variables necessary for the partial
        lnprob calculation for one echelle order. It is designed to first be
        instantiated within the main processes and then forked to other
        subprocesses. Once operating in the subprocess, the variables specific
        to the order are loaded with an `INIT` message call, which tells which key
        to initialize on in the `self.initialize()`.
        '''
        self.lnprob = -np.inf
        self.lnprob_last = -np.inf

        self.func_dict = {"INIT": self.initialize,
                          "DECIDE": self.decide_stellar,
                          "INST": self.instantiate,
                          "LNPROB": self.stellar_lnprob,
                          "GET_LNPROB": self.get_lnprob,
                          "FINISH": self.finish
                          }

        self.debug = debug

    def initialize(self, key):
        '''
        Initialize the OrderModel to the correct chunk of data (echelle order).

        :param key: (spectrum_id, order_id)
        :param type: (int, int)

        This should only be called after all subprocess have been forked.
        '''

        self.id = key
        self.spectrum_id, self.order_id = self.id

        self.logger.info("Initializing model on Spectrum {}, order {}.".format(self.spectrum_id, self.order_id))

        self.instrument = Instruments[self.spectrum_id]
        self.DataSpectrum = DataSpectra[self.spectrum_id]
        self.wl = self.DataSpectrum.wls[self.order_id]
        self.fl = self.DataSpectrum.fls[self.order_id]
        self.sigma = self.DataSpectrum.sigmas[self.order_id]
        self.npoints = len(self.wl)
        self.mask = self.DataSpectrum.masks[self.order_id]
        self.order = self.DataSpectrum.orders[self.order_id]

        self.logger = logging.getLogger("{} {}".format(self.__class__.__name__, self.order))
        if self.debug:
            self.logger.setLevel(logging.DEBUG)
        else:
            self.logger.setLevel(logging.INFO)

        self.npoly = config["cheb_degree"]
        self.ChebyshevSpectrum = ChebyshevSpectrum(self.DataSpectrum, self.order_id, npoly=self.npoly)
        self.resid_deque = deque(maxlen=500) #Deque that stores the last residual spectra, for averaging
        self.counter = 0

        self.Emulator = Emulator.open(config["PCA_path"]) # Returns mu and var vectors
        self.Emulator.determine_chunk_log(self.wl) # Truncates the grid to this wl format, power of 2

        pg = self.Emulator.PCAGrid

        self.wl_FFT = pg.wl
        self.ncomp = pg.ncomp

        self.PCOMPS = np.vstack((pg.flux_mean[np.newaxis,:], pg.flux_std[np.newaxis,:], pg.pcomps))

        self.min_v = self.Emulator.min_v
        self.ss = np.fft.rfftfreq(len(self.wl_FFT), d=self.min_v)
        self.ss[0] = 0.01 # junk so we don't get a divide by zero error

        self.pcomps = np.empty((self.ncomp, self.npoints))
        self.flux_mean = np.empty((self.npoints,))
        self.flux_std = np.empty((self.npoints,))
        self.mus, self.vars = None, None
        self.C_GP = None
        self.data_mat = None

        self.sigma_matrix = self.sigma**2 * np.eye(self.npoints)

        self.prior = 0.0 # Modified and set by NuisanceSampler.lnprob
        self.nregions = 0
        self.exceptions = []

        #TODO: perturb
        #if args.perturb:
            #perturb(stellar_Starting, config["stellar_jump"], factor=args.perturb)

        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,))
        cheb_tuple = ("logc0",)
        # add in new coefficients
        for i in range(1, self.npoly):
            cheb_tuple += ("c{}".format(i),)
        # set starting position to 0
        cheb_Starting = {k:0.0 for k in cheb_tuple}

        # Design cov starting
        cov_Starting = config['cov_params']
        cov_tuple = C.dictkeys_to_cov_global_tuple(cov_Starting)
        cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in cov_tuple])**2

        nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov)))
        nuisance_starting = {"cheb": cheb_Starting, "cov": cov_Starting, "regions":{}}

        # Because this initialization is happening on the subprocess, I think
        # the random state should be fine.

        # Update the outdir based upon id
        self.noutdir = outdir + "{}/{}/".format(self.spectrum_id, self.order)

        # Create the nuisance parameter sampler to run independently
        self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=nuisance_starting, cov=nuisance_MH_cov,
                                       debug=True, outdir=self.noutdir, order=self.order)
        self.p0 = self.sampler.p0

        # Udpate the nuisance parameters to the starting values so that we at
        # least have a self.data_mat
        self.logger.info("Updating nuisance parameter data products to starting values.")
        self.update_nuisance(nuisance_starting)
        self.lnprob = None

    def instantiate(self, *args):
        '''
        Clear the old NuisanceSampler, instantiate the regions using the stored
        residual spectra, and create a new NuisanceSampler.
        '''

        # threshold for sigma clipping
        sigma=config["sigma_clip"]

        # array that specifies if a pixel is already covered.
        # to start, it should be all False
        covered = np.zeros((self.npoints,), dtype='bool')

        #average all of the spectra in the deque together
        residual_array = np.array(self.resid_deque)
        if len(self.resid_deque) == 0:
            raise RuntimeError("No residual spectra stored yet.")
        else:
            residuals = np.average(residual_array, axis=0)

        # run the sigma_clip algorithm until converged, and we've identified the outliers
        filtered_data = sigma_clip(residuals, sig=sigma, iters=None)
        mask = filtered_data.mask
        wl = self.wl

        sigma0 = config['region_priors']['sigma0']
        logAmp = config["region_params"]["logAmp"]
        sigma = config["region_params"]["sigma"]

        # Sort in decreasing strength of residual
        self.nregions = 0
        regions = {}

        region_mus = {}
        for w, resid in sorted(zip(wl[mask], np.abs(residuals[mask])), key=itemgetter(1), reverse=True):
            if w in wl[covered]:
                continue
            else:
                # check to make sure region is not *right* at the edge of the echelle order
                if w <= np.min(wl) or w >= np.max(wl):
                    continue
                else:
                    # instantiate region and update coverage

                    # Default amp and sigma values
                    regions[self.nregions] = {"logAmp":logAmp, "sigma":sigma, "mu":w}
                    region_mus[self.nregions] = w # for evaluating the mu prior
                    self.nregions += 1

                    # determine the stretch of wl covered by this new region
                    ind = (wl >= (w - sigma0)) & (wl <= (w + sigma0))
                    # update the covered regions
                    covered = covered | ind

        # Take the current nuisance positions as a starting point, and add the regions
        starting_dict = self.sampler.params.copy()
        starting_dict["regions"] = regions

        region_mus = np.array([region_mus[i] for i in range(self.nregions)])

        # Setup the priors
        region_priors = config["region_priors"]
        region_priors.update({"mus":region_mus})
        prior_params = {"regions":region_priors}

        # do all this crap again
        cheb_MH_cov = float(config["cheb_jump"])**2 * np.ones((self.npoly,))
        cov_MH_cov = np.array([float(config["cov_jump"][key]) for key in self.sampler.cov_tup])**2
        region_MH_cov = [float(config["region_jump"][key])**2 for key in C.cov_region_parameters]
        regions_MH_cov = np.array([region_MH_cov for i in range(self.nregions)]).flatten()

        nuisance_MH_cov = np.diag(np.concatenate((cheb_MH_cov, cov_MH_cov, regions_MH_cov)))

        print(starting_dict)
        print("cov shape {}".format(nuisance_MH_cov.shape))

        # Initialize a new sampler, replacing the old one
        self.sampler = NuisanceSampler(OrderModel=self, starting_param_dict=starting_dict, cov=nuisance_MH_cov,
                                       debug=True, outdir=self.noutdir, prior_params=prior_params, order=self.order)

        self.p0 = self.sampler.p0

        # Update the nuisance parameters to the starting values so that we at least have a self.data_mat
        print("Updating nuisance parameter data products to starting values.")
        self.update_nuisance(starting_dict)
        self.lnprob = self.evaluate()

        # To speed up convergence, try just doing a bunch of nuisance runs before
        # going into the iteration pattern
        print("Doing nuisance burn-in for {} samples".format(config["nuisance_burn"]))
        self.independent_sample(config["nuisance_burn"])

    def get_lnprob(self, *args):
        '''
        Return the *current* value of lnprob.

        Intended to be called from the master process (StellarSampler.sample), to
        query the child processes for their current value of lnprob.
        '''
        return self.lnprob

    def stellar_lnprob(self, params):
        '''
        Update the model to the parameters and then evaluate the lnprob.

        Intended to be called from the master process via the command "LNPROB".
        '''

        try:
            self.update_stellar(params)
            lnp = self.evaluate() # Also sets self.lnprob to new value
            return lnp
        except C.ModelError:
            self.logger.debug("ModelError in stellar parameters, sending back -np.inf {}".format(params))
            return -np.inf

    def evaluate(self):
        '''
        Return the lnprob using the current version of the DataCovariance matrix
        and other intermediate products.
        '''
        self.lnprob_last = self.lnprob

        X = (self.ChebyshevSpectrum.k * self.flux_std * np.eye(self.npoints)).dot(self.pcomps.T)

        CC = X.dot(self.C_GP.dot(X.T)) + self.data_mat

        R = self.fl - self.ChebyshevSpectrum.k * self.flux_mean - X.dot(self.mus)

        try:
            factor, flag = cho_factor(CC)
        except np.linalg.LinAlgError as e:
            self.logger.debug("self.sampler.params are {}".format(self.sampler.params))
            raise C.ModelError("Can't Cholesky factor {}".format(e))

        logdet = np.sum(2 * np.log((np.diag(factor))))

        self.lnprob = -0.5 * (np.dot(R, cho_solve((factor, flag), R)) + logdet) + self.prior

        if self.counter % 100 == 0:
            self.resid_deque.append(R)

        self.counter += 1

        return self.lnprob

    def revert_stellar(self):
        '''
        Revert the status of the model from a rejected stellar proposal.
        '''

        self.logger.debug("Reverting stellar parameters")

        self.lnprob = self.lnprob_last

        self.flux_mean = self.flux_mean_last
        self.flux_std = self.flux_std_last
        self.pcomps = self.pcomps_last

        self.mus, self.vars = self.mus_last, self.vars_last
        self.C_GP = self.C_GP_last

    def update_stellar(self, params):
        '''
        Update the model to the current stellar parameters.
        '''

        self.logger.debug("Updating stellar parameters to {}".format(params))

        # Store the current accepted values before overwriting with new proposed values.
        self.flux_mean_last = self.flux_mean
        self.flux_std_last = self.flux_std
        self.pcomps_last = self.pcomps
        self.mus_last, self.vars_last = self.mus, self.vars
        self.C_GP_last = self.C_GP

        #TODO: Possible speedups:
        # 1. Store the PCOMPS pre-FFT'd

        # Shift the velocity
        vz = params["vz"]
        # Local, shifted copy
        wl_FFT = self.wl_FFT * np.sqrt((C.c_kms + vz) / (C.c_kms - vz))

        # FFT and convolve operations
        vsini = params["vsini"]

        if vsini < 0.2:
            raise C.ModelError("vsini must be positive")

        FF = np.fft.rfft(self.PCOMPS, axis=1)

        # Determine the stellar broadening kernel
        ub = 2. * np.pi * vsini * self.ss
        sb = j1(ub) / ub - 3 * np.cos(ub) / (2 * ub ** 2) + 3. * np.sin(ub) / (2 * ub ** 3)
        # set zeroth frequency to 1 separately (DC term)
        sb[0] = 1.

        # institute velocity and instrumental taper
        FF_tap = FF * sb

        # do ifft
        pcomps_full = np.fft.irfft(FF_tap, len(wl_FFT), axis=1)

        # Spectrum resample operations
        if min(self.wl) < min(wl_FFT) or max(self.wl) > max(wl_FFT):
            raise RuntimeError("Data wl grid ({:.2f},{:.2f}) must fit within the range of wl_FFT ({"
                       ":.2f},{:.2f})".format(min(self.wl), max(self.wl), min(wl_FFT), max(wl_FFT)))

        # Take the output from the FFT operation (pcomps_full), and stuff them
        # into respective data products
        for lres, hres in zip(chain([self.flux_mean, self.flux_std], self.pcomps), pcomps_full):
            interp = InterpolatedUnivariateSpline(wl_FFT, hres, k=5)
            lres[:] = interp(self.wl)
            del interp

        gc.collect()

        # Adjust flux_mean and flux_std by Omega
        Omega = 10**params["logOmega"]
        self.flux_mean *= Omega
        self.flux_std *= Omega

        # Now update the parameters from the emulator
        pars = np.array([params["temp"], params["logg"], params["Z"]])

        # If pars are outside the grid, Emulator will raise C.ModelError
        self.mus, self.vars = self.Emulator(pars)

        self.C_GP = self.vars * np.eye(self.ncomp)

    def decide_stellar(self, yes):
        '''
        Interpret the decision from the master process to either revert the
        stellar model (rejected parameters) or move on (accepted parameters).
        '''
        if yes:
            # accept and move on
            self.logger.debug("Deciding to accept stellar parameters")
        else:
            # revert and move on
            self.logger.debug("Deciding to revert stellar parameters")
            self.revert_stellar()

        # Proceed with independent sampling
        self.independent_sample(1)

    def update_nuisance(self, params):
        '''
        Update the nuisance parameters and data covariance matrix.

        :param params: large dictionary containing cheb, cov, and regions
        '''

        self.logger.debug("Updating nuisance parameters to {}".format(params))
        # Read off the Chebyshev parameters and update
        self.ChebyshevSpectrum.update(params["cheb"])

        # Create the full data covariance matrix.
        l = params["cov"]["l"]
        sigAmp = params["cov"]["sigAmp"]

        # Check to make sure the global covariance parameters make sense
        if sigAmp < 0.1:
            raise C.ModelError("sigAmp shouldn't be lower than 0.1, something is wrong.")

        max_r = 6.0 * l # [km/s]

        # Check all regions, take the max
        if self.nregions > 0:
            regions = params["regions"]
            keys = sorted(regions)
            sigmas = np.array([regions[key]["sigma"] for key in keys]) #km/s
            #mus = np.array([regions[key]["mu"] for key in keys])
            max_reg = 4.0 * np.max(sigmas)
            #If this is a larger distance than the global length, replace it
            max_r = max_reg if max_reg > max_r else max_r
            #print("Max_r now set by regions {}".format(max_r))

        # print("max_r is {}".format(max_r))

        # Create a partial function which returns the proper element.
        k_func = make_k_func(params)

        # Store the previous data matrix in case we want to revert later
        self.data_mat_last = self.data_mat
        self.data_mat = get_dense_C(self.wl, k_func=k_func, max_r=max_r) + sigAmp*self.sigma_matrix

    def revert_nuisance(self, *args):
        '''
        Revert all products from the nuisance parameters, including the data
        covariance matrix.
        '''

        self.logger.debug("Reverting nuisance parameters")

        self.lnprob = self.lnprob_last

        self.ChebyshevSpectrum.revert()
        self.data_mat = self.data_mat_last

    def clear_resid_deque(self):
        '''
        Clear the accumulated residual spectra.
        '''
        self.resid_deque.clear()

    def independent_sample(self, niter):
        '''
        Do the independent sampling specific to this echelle order, using the
        attached self.sampler (NuisanceSampler).

        :param niter: number of iterations to complete before returning to master process.

        '''

        self.logger.debug("Beginning independent sampling on nuisance parameters")

        if self.lnprob:
            # If we have a current value, pass it to the sampler
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter, lnprob0=self.lnprob)
        else:
            # Otherwise, start from the beginning
            self.p0, self.lnprob, state = self.sampler.run_mcmc(pos0=self.p0, N=niter)

        self.logger.debug("Finished independent sampling on nuisance parameters")
        # Don't return anything to the master process.

    def finish(self, *args):
        '''
        Wrap up the sampling and write the samples to disk.
        '''

        print(self.sampler.acceptance_fraction)
        print(self.sampler.acor)
        self.sampler.write()
        self.sampler.plot() # triangle_plot=True
        print("There were {} exceptions.".format(len(self.exceptions)))
        # print out the values of each region key.
        for exception in self.exceptions:
            regions = exception["regions"]
            keys = sorted(regions)
            for key in keys:
                print(regions[key])
            cov = exception["cov"]
            print(cov)
            print("\n\n")

    def brain(self, conn):
        '''
        The infinite loop of the subprocess, which continues to listen for
        messages on the pipe.
        '''
        self.conn = conn
        alive = True
        while alive:
            #Keep listening for messages put on the Pipe
            alive = self.interpret()
            #Once self.interpret() returns `False`, this loop will die.
        self.conn.send("DEAD")

    def interpret(self):
        '''
        Interpret the messages being put into the Pipe, and do something with
        them. Messages are always sent in a 2-arg tuple (fname, arg)
        Right now we only expect one function and one argument but this could
        be generalized to **args.
        '''
        #info("brain")

        fname, arg = self.conn.recv() # Waits here to receive a new message
        self.logger.debug("{} received message {}".format(os.getpid(), (fname, arg)))

        func = self.func_dict.get(fname, False)
        if func:
            response = func(arg)
        else:
            self.logger.info("Given an unknown function {}, assuming kill signal.".format(fname))
            return False

        # Functions only return a response other than None when they want them
        # communicated back to the master process.
        # Some commands sent to the child processes do not require a response
        # to the main process.
        if response:
            self.logger.debug("{} sending back {}".format(os.getpid(), response))
            self.conn.send(response)
        return True