def X(self, i, j=slice(None, None, None)): X1 = self.fpix[j] / self.norm[j].reshape(-1, 1) X = np.product(list(multichoose(X1.T, i + 1)), axis=1).T if self.X1N is not None: return np.hstack([X, self.X1N[j]**(i + 1)]) else: return X
def perform_PLD(self, fpix, motion, mask): ''' Perform first order PLD on a light curve Returns: detrended light curve, raw light curve ''' outM = lambda x: np.delete(x, mask, axis=0) # hack naninds = np.where(np.isnan(fpix)) fpix[naninds] = 0 # generate flux light curve fpix = outM(fpix) fpix_rs = fpix.reshape(len(fpix), -1) flux = np.sum(fpix_rs, axis=1) # First order PLD f1 = fpix_rs / flux.reshape(-1, 1) pca = PCA(n_components=20) X1 = pca.fit_transform(f1) # Second order PLD f2 = np.product(list(multichoose(f1.T, 2)), axis=1).T pca = PCA(n_components=10) X2 = pca.fit_transform(f2) X10 = np.load('masks/larger_aperture/X10_%i.npz' % motion)['X'] X10crop = [] for i in X10: X10crop.append(i[1:]) X10crop = np.array(outM(X10crop)) # Combine them and add a column vector of 1s for stability X3 = np.hstack([np.ones(X1.shape[0]).reshape(-1, 1), X1, X2]) X = np.concatenate((X3, X10crop), axis=1) # np.savez(('masks/larger_aperture/X10_%i'%motion),X=X) MX = self.M(X) A = np.dot(MX.T, MX) B = np.dot(MX.T, self.M(flux)) C = np.linalg.solve(A, B) # compute detrended light curve model = np.dot(X, C) detrended = flux - model + np.nanmean(flux) # folded # D = (detrended - np.dot(C[1:], X[:,1:].T) + np.nanmedian(detrended)) / np.nanmedian(detrended) # T = (t - 5.0 - per / 2.) % per - per / 2. return detrended, flux
def X(self, i, j=slice(None, None, None)): ''' Computes the design matrix at the given *PLD* order and the given indices. The columns are the *PLD* vectors for the target at the corresponding order, computed as the product of the fractional pixel flux of all sets of :py:obj:`n` pixels, where :py:obj:`n` is the *PLD* order. ''' X1 = self.fpix[j] / self.norm[j].reshape(-1, 1) X = np.product(list(multichoose(X1.T, i + 1)), axis=1).T if self.X1N is not None: return np.hstack([X, self.X1N[j]**(i + 1)]) else: return X
def X(self, i, j=slice(None, None, None)): ''' Computes the design matrix at the given *PLD* order and the given indices. The columns are the *PLD* vectors for the target at the corresponding order, computed as the product of the fractional pixel flux of all sets of :py:obj:`n` pixels, where :py:obj:`n` is the *PLD* order. ''' X1 = self.fpix[j] / self.norm[j].reshape(-1, 1) X = np.product(list(multichoose(X1.T, i + 1)), axis=1).T if self.X1N is not None: return np.hstack([X, self.X1N[j] ** (i + 1)]) else: return X
def create_design_matrix( self, pld_order=3, pca_components=16, pld_aperture_mask=None, background_aperture_mask="background", spline_n_knots=None, spline_degree=3, normalize_background_pixels=None, sparse=False, ): """Returns a `.DesignMatrixCollection` containing a `DesignMatrix` object for the background regressors, the PLD pixel component regressors, and the spline regressors. If the parameters `pld_order` and `pca_components` are None, their value will be assigned based on the mission. K2 and TESS experience different dominant sources of noise, and require different defaults. For information about how the defaults were chosen, see Pull Request #746. Parameters ---------- pld_order : int The order of Pixel Level De-correlation to be performed. First order (`n=1`) uses only the pixel fluxes to construct the design matrix. Higher order populates the design matrix with columns constructed from the products of pixel fluxes. pca_components : int or tuple of int Number of terms added to the design matrix for each order of PLD pixel fluxes. Increasing this value may provide higher precision at the expense of slower speed and/or overfitting. If performing PLD with `pld_order > 1`, `pca_components` can be a tuple containing the number of terms for each order of PLD. If a single int is passed, the same number of terms will be used for each order. If zero is passed, PCA will not be performed. Defaults to 16 for K2 and 8 for TESS. pld_aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None A boolean array describing the aperture such that `True` means that the pixel will be used when selecting the PLD basis vectors. If `None` or `all` are passed in, all pixels will be used. If 'pipeline' is passed, the mask suggested by the official pipeline will be returned. If 'threshold' is passed, all pixels brighter than 3-sigma above the median flux will be used. spline_n_knots : int Number of knots in spline. spline_degree : int Polynomial degree of spline. sparse : bool Whether to create `SparseDesignMatrix`. Returns ------- dm : `.DesignMatrixCollection` `.DesignMatrixCollection` containing pixel, background, and spline components. """ # Validate the inputs pld_aperture_mask = self.tpf._parse_aperture_mask(pld_aperture_mask) self.pld_aperture_mask = pld_aperture_mask background_aperture_mask = self.tpf._parse_aperture_mask( background_aperture_mask) self.background_aperture_mask = background_aperture_mask if spline_n_knots is None: # Default to a spline per 50 data points spline_n_knots = int(len(self.lc) / 50) if sparse: DMC = SparseDesignMatrixCollection spline = create_sparse_spline_matrix else: DMC = DesignMatrixCollection spline = create_spline_matrix # We set the width of all coefficient priors to 10 times the standard # deviation to prevent the fit from going crazy. prior_sigma = np.nanstd(self.lc.flux.value) * 10 # Flux normalize background components for K2 and not for TESS by default bkg_pixels = self.tpf.flux[:, background_aperture_mask].reshape( len(self.tpf.flux), -1) if normalize_background_pixels: bkg_flux = np.nansum(self.tpf.flux[:, background_aperture_mask], -1) bkg_pixels = np.array( [r / f for r, f in zip(bkg_pixels, bkg_flux)]) else: bkg_pixels = bkg_pixels.value # Remove NaNs bkg_pixels = np.array([r[np.isfinite(r)] for r in bkg_pixels]) # Create background design matrix dm_bkg = DesignMatrix(bkg_pixels, name="background") # Apply PCA dm_bkg = dm_bkg.pca(pca_components) # Set prior sigma to 10 * standard deviation dm_bkg.prior_sigma = np.ones(dm_bkg.shape[1]) * prior_sigma # Create a design matric containing splines plus a constant dm_spline = spline(self.lc.time.value, n_knots=spline_n_knots, degree=spline_degree).append_constant() # Set prior sigma to 10 * standard deviation dm_spline.prior_sigma = np.ones(dm_spline.shape[1]) * prior_sigma # Create a PLD matrix if there are pixels in the pld_aperture_mask if np.sum(pld_aperture_mask) != 0: # Flux normalize the PLD components pld_pixels = self.tpf.flux[:, pld_aperture_mask].reshape( len(self.tpf.flux), -1) pld_pixels = np.array( [r / f for r, f in zip(pld_pixels, self.lc.flux.value)]) # Remove NaNs pld_pixels = np.array([r[np.isfinite(r)] for r in pld_pixels]) # Use the DesignMatrix infrastructure to apply PCA to the regressors. regressors_dm = DesignMatrix(pld_pixels) if pca_components > 0: regressors_dm = regressors_dm.pca(pca_components) regressors_pld = regressors_dm.values # Create a DesignMatrix for each PLD order all_pld = [] for order in range(1, pld_order + 1): reg_n = np.product(list(multichoose(regressors_pld.T, order)), axis=1).T pld_n = DesignMatrix( reg_n, prior_sigma=np.ones(reg_n.shape[1]) * prior_sigma / reg_n.shape[1], name=f"pld_order_{order}", ) # Apply PCA. if pca_components > 0: pld_n = pld_n.pca(pca_components) # Calling pca() resets the priors, so we set them again. pld_n.prior_sigma = (np.ones(pld_n.shape[1]) * prior_sigma / pca_components) all_pld.append(pld_n) # Create the collection of DesignMatrix objects. # DesignMatrix 1 contains the PLD pixel series dm_pixels = DesignMatrixCollection(all_pld).to_designmatrix( name="pixel_series") with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message= ".*Not all matrices are `SparseDesignMatrix` objects..*", ) dm_collection = DMC([dm_pixels, dm_bkg, dm_spline]) else: with warnings.catch_warnings(): warnings.filterwarnings( "ignore", message= ".*Not all matrices are `SparseDesignMatrix` objects..*", ) dm_collection = DMC([dm_bkg, dm_spline]) return dm_collection
def PLD2(time, flux, ferr, lc, ap, n=None, mask=None, gp_timescale=30): if n is None: n = min(20, ap.sum()) xmin, xmax = min(np.where(ap)[0]), max(np.where(ap)[0]) ymin, ymax = min(np.where(ap)[1]), max(np.where(ap)[1]) flux_crop = flux[:, xmin:xmax + 1, ymin:ymax + 1] ferr_crop = ferr[:, xmin:xmax + 1, ymin:ymax + 1] ap_crop = ap[xmin:xmax + 1, ymin:ymax + 1] flux_err = np.nansum(ferr_crop[:, ap_crop]**2, axis=1)**0.5 if mask is None: mask = np.where(time) #flsa = SavGol(lc) #med = np.nanmedian(lc) #MAD = 1.4826 * np.nanmedian(np.abs(lc - med)) #print(np.where(~(lc > med + 10.*MAD) | (lc < med - 10.*MAD))[0]) M = lambda x: x[mask] apval = np.copy(ap_crop).astype(int) ap_flux = np.array([f * apval for f in flux_crop]).reshape(len(flux_crop), -1) rawflux = np.sum(ap_flux.reshape(len(ap_flux), -1), axis=1) f1 = ap_flux / rawflux.reshape(-1, 1) pca = PCA(n_components=n) X1 = pca.fit_transform(f1) f2 = np.product(list(multichoose(f1.T, 2)), axis=1).T pca = PCA(n_components=n) X2 = pca.fit_transform(f2) #f3 = np.product(list(multichoose(f1.T, 3)), axis=1).T #pca = PCA(n_components=n) #X3 = pca.fit_transform(f3) X = np.hstack([np.ones(X1.shape[0]).reshape(-1, 1), X1, X2]) MX = M(X) y = M(rawflux) - np.dot( MX, np.linalg.solve(np.dot(MX.T, MX), np.dot(MX.T, M(rawflux)))) amp = np.nanstd(y) tau = gp_timescale ker = celerite.terms.Matern32Term(np.log(amp), np.log(tau)) gp = celerite.GP(ker) sigma = gp.get_matrix(M(time)) + np.diag( np.sum(M(ferr_crop).reshape(len(M(ferr_crop)), -1), axis=1)**2) A = np.dot(MX.T, np.linalg.solve(sigma, MX)) B = np.dot(MX.T, np.linalg.solve(sigma, M(rawflux))) C = np.linalg.solve(A, B) model = np.dot(X, C) det_flux = rawflux - (model - np.nanmean(model)) return det_flux, flux_err
def correct(self, aperture_mask=None, cadence_mask=None, gp_timescale=30, use_gp=True, pld_order=2, n_pca_terms=10, pld_aperture_mask=None): r"""Returns a PLD systematics-corrected LightCurve. Parameters ---------- aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None A boolean array describing the aperture such that `True` means that the pixel will be used to generate the raw flux light curve. If `None` or 'all' are passed, all pixels will be used. If 'pipeline' is passed, the mask suggested by the official pipeline will be returned. If 'threshold' is passed, all pixels brighter than 3-sigma above the median flux will be used. cadence_mask : array-like A mask that will be applied to the cadences prior to constructing the detrending model. For example, you can pass a boolean array of length `n_cadences` where `True` means that the cadence will be included in the noise model. You may also pass an array of indices. This option enables signals of interest (e.g. planet transits) to be excluded from the noise model, which will prevent over-fitting. By default, no cadences will be masked. gp_timescale : float Gaussian Process time scale length term (`tau`) used to define length of fit variability in days. use_gp : boolean Option to turn GP fitting on or off. You would typically only set this to False to speed up the correction (at the cost of precision), or if you suspect the presence of systematic noise at long timescales. pld_order : int The order of Pixel Level De-correlation to be performed. First order (`n=1`) uses only the pixel fluxes to construct the design matrix. Higher order populates the design matrix with columns constructed from the products of pixel fluxes. n_pca_terms : int Number of terms added to the design matrix from each order of PLD when performing Principal Component Analysis for models higher than first order. Increasing this value may provide higher precision at the expense of computational time. pld_aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None A boolean array describing the aperture such that `True` means that the pixel will be used when selecting the PLD basis vectors. If `None` or `all` are passed in, all pixels will be used. If 'pipeline' is passed, the mask suggested by the official pipeline will be returned. If 'threshold' is passed, all pixels brighter than 3-sigma above the median flux will be used. Returns ------- corrected_lightcurve : `~lightkurve.lightcurve.LightCurve` Returns a corrected lightcurve object. Depending on the input, the returned object will be a `KeplerLightCurve`, `TessLightCurve`, or general `LightCurve` object. """ if use_gp: # Verify optional dependency try: import celerite except ImportError: log.error("PLD uses the `celerite` Python package. " "See the installation instructions at " "https://docs.lightkurve.org/about/install.html. " "`use_gp` has been set to `False`.") use_gp = False # Parse the aperture mask to accept strings etc. aperture = self.tpf._parse_aperture_mask(aperture_mask) # generate flux light curve from desired pixels lc = self.tpf.to_lightcurve(aperture_mask=aperture) rawflux = lc.flux.value rawflux_err = lc.flux_err.value # create nan mask nanmask = np.isfinite(self.time) nanmask &= np.isfinite(rawflux) nanmask &= np.isfinite(rawflux_err) nanmask &= np.abs(rawflux_err) > 1e-12 # mask out nan values rawflux = rawflux[nanmask] rawflux_err = rawflux_err[nanmask] self.flux = self.flux[nanmask] self.flux_err = self.flux_err[nanmask] self.time = self.time[nanmask] # parse the PLD aperture mask pld_pixel_mask = self.tpf._parse_aperture_mask(pld_aperture_mask) # find pixel bounds of aperture on tpf xmin, xmax = min(np.where(pld_pixel_mask)[0]), max( np.where(pld_pixel_mask)[0]) ymin, ymax = min(np.where(pld_pixel_mask)[1]), max( np.where(pld_pixel_mask)[1]) # crop data cube to include only desired pixels # this is required for superstamps to ensure matrix is invertable flux_crop = self.flux[:, xmin:xmax + 1, ymin:ymax + 1] flux_err_crop = self.flux_err[:, xmin:xmax + 1, ymin:ymax + 1] aperture_crop = pld_pixel_mask[xmin:xmax + 1, ymin:ymax + 1] # calculate errors (ignore warnings related to zero or negative errors) with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) flux_err = np.nansum(flux_err_crop[:, aperture_crop]**2, axis=1)**0.5 # first order PLD design matrix pld_flux = flux_crop[:, aperture_crop] f1 = np.reshape(pld_flux, (len(pld_flux), -1)) X1 = f1 / np.nansum(pld_flux, axis=-1)[:, None] # No NaN pixels X1 = X1[:, np.isfinite(X1).all(axis=0)] # higher order PLD design matrices X_sections = [np.ones((len(flux_crop), 1)), X1] for i in range(2, pld_order + 1): f2 = np.product(list(multichoose(X1.T, pld_order)), axis=1).T try: # We use an optional dependency for very fast PCA (fbpca). # If the import fails we will fall back on using the slower `np.linalg.svd` from fbpca import pca components, _, _ = pca(f2, n_pca_terms) except ImportError: log.error("PLD uses the `fbpca` package. You can pip install " "with `pip install fbpca`. Using `np.linalg.svd` " "instead.") components, _, _ = np.linalg.svd(f2) X_n = components[:, :n_pca_terms] X_sections.append(X_n) # Create the design matrix X by stacking X1 and higher order components, and # adding a column vector of 1s for numerical stability (see Luger et al.). # X has shape (n_components_first + n_components_higher_order + 1, n_cadences) X = np.concatenate(X_sections, axis=1) # set default transit mask if cadence_mask is None: cadence_mask = np.ones_like(lc.time, dtype=bool) M = lambda x: x[cadence_mask[nanmask]] # mask transits in design matrix MX = M(X) if use_gp: # We use a Gaussian Process to model the long term trend. # We do this by estimating the long term trend y by applying the # preliminary PLD model defined above and subtracting it from the raw light curve. # The "in transit" cadences are masked out in this step to prevent the # long term approximation from over-fitting the transits. XTX = np.dot(MX.T, MX) XTX[np.diag_indices_from(XTX)] += 1e-8 XTy = np.dot(MX.T, M(rawflux)) y = M(rawflux) - np.dot(MX, np.linalg.solve(XTX, XTy)) # Estimate the amplitude parameter of a Matern-3/2 kernel GP # by computing the standard deviation of y. amp = np.nanstd(y) tau = gp_timescale # tau is a user-defined parameter # set up gaussian process using celerite # we use a Matern-3/2 kernel for its flexibility and non-periodicity kernel = celerite.terms.Matern32Term(np.log(amp), np.log(tau)) gp = celerite.GP(kernel) gp.compute(M(self.time), M(rawflux_err)) # compute the coefficients C on the basis vectors; # the PLD design matrix will be dotted with C to solve for the noise model. A = np.dot(MX.T, gp.apply_inverse(MX)) B = np.dot(MX.T, gp.apply_inverse(M(rawflux)[:, None])[:, 0]) else: # compute the coefficients C on the basis vectors; # the PLD design matrix will be dotted with C to solve for the noise model. ivar = 1.0 / M(rawflux_err)**2 # inverse variance A = np.dot(MX.T, MX * ivar[:, None]) B = np.dot(MX.T, M(rawflux) * ivar) # apply prior to design matrix weights for numerical stability A[np.diag_indices_from(A)] += 1e-8 C = np.linalg.solve(A, B) # compute detrended light curve model = np.dot(X, C) self.detrended_flux = rawflux - (model - np.nanmean(model)) # Create and return a new LightCurve object with the corrected flux corrected_lc = lc.copy()[nanmask] corrected_lc.flux = self.detrended_flux corrected_lc.flux_err = flux_err.value return corrected_lc
def correct(self, aperture_mask=None, cadence_mask=None, gp_timescale=30, n_components_first=None, n_components_second=20, use_gp=True): r"""Returns a PLD systematics-corrected LightCurve. Parameters ---------- aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None A boolean array describing the aperture such that `True` means that the pixel will be used. If `None` or 'all' are passed, all pixels will be used. If 'pipeline' is passed, the mask suggested by the official pipeline will be returned. If 'threshold' is passed, all pixels brighter than 3-sigma above the median flux will be used. cadence_mask : array-like A mask that will be applied to the cadences prior to constructing the detrending model. For example, you can pass a boolean array of length `n_cadences` where `True` means that the cadence will be included in the noise model. You may also pass an array of indices. This option enables signals of interest (e.g. planet transits) to be excluded from the noise model, which will prevent over-fitting. By default, no cadences will be masked. gp_timescale : float Gaussian Process time scale length term (`tau`) used to define length of fit variability in days. n_components_first : int Number of first-order PLD components to reduce to with PCA. Must be smaller than the number of pixels in the aperture mask. If `None`, then 25 or the number of pixels in the mask will be used, whichever is smaller. n_components_second : int Number of second-order PLD components to reduce to with PCA. use_gp : boolean Option to turn GP fitting on or off. Returns ------- corrected_lightcurve : `~lightkurve.lightcurve.LightCurve` Returns a corrected lightcurve object. Depending on the input, the returned object will be a `KeplerLightCurve`, `TessLightCurve`, or general `LightCurve` object. """ # Verify optional dependencies try: import celerite except ImportError: log.error("PLD requires the `celerite` Python package. " "See the installation instructions at " "https://docs.lightkurve.org/about/install.html") return None try: from sklearn.decomposition import PCA except ImportError: log.error("PLD requires the `scikit-learn` Python package. " "See the installation instructions at " "https://docs.lightkurve.org/about/install.html") return None # Parse the aperture mask to accept strings etc. aperture = self.tpf._parse_aperture_mask(aperture_mask) # n_components_first cannot be larger than the number of pixels in the mask if n_components_first is None: n_components_first = min(25, (aperture > 0).sum()) # find pixel bounds of aperture on tpf xmin, xmax = min(np.where(aperture)[0]), max(np.where(aperture)[0]) ymin, ymax = min(np.where(aperture)[1]), max(np.where(aperture)[1]) # crop data cube to include only desired pixels # this is required for superstamps to ensure matrix is invertable flux_crop = self.flux[:, xmin:xmax + 1, ymin:ymax + 1] flux_err_crop = self.flux_err[:, xmin:xmax + 1, ymin:ymax + 1] aperture_crop = aperture[xmin:xmax + 1, ymin:ymax + 1] # calculate errors (ignore warnings related to zero or negative errors) with warnings.catch_warnings(): warnings.simplefilter("ignore", RuntimeWarning) flux_err = np.nansum(flux_err_crop[:, aperture_crop]**2, axis=1)**0.5 # set default transit mask if cadence_mask is None: cadence_mask = np.where(self.time) M = lambda x: x[cadence_mask] # generate flux light curve from desired pixels lc = self.tpf.to_lightcurve(aperture_mask=aperture) # set aperture values aperture_vals = np.copy(aperture_crop).astype(int) # `aperture_flux` contains the per-pixel lightcurve in a matrix # with shape (n_cadences, n_pixels). # We will run PCA on this matrix further below to arrive at the design # matrix for the noise model. self.aperture_flux = np.array([f * aperture_vals for f in flux_crop ]).reshape(len(flux_crop), -1) rawflux = np.sum(self.aperture_flux.reshape(len(self.aperture_flux), -1), axis=1) # first order PLD f1 = self.aperture_flux / rawflux.reshape(-1, 1) pca = PCA(n_components=n_components_first) X1 = pca.fit_transform(f1) # second order PLD f2 = np.product(list(multichoose(f1.T, 2)), axis=1).T pca = PCA(n_components=n_components_second) X2 = pca.fit_transform(f2) # Create the design matrix X by stacking X1 and X2 and adding a column # vector of 1s for numerical stability (see Luger et al.). # X has shape (n_components_first + n_components_second + 1, n_cadences) X = np.hstack([np.ones(X1.shape[0]).reshape(-1, 1), X1, X2]) # mask transits in design matrix MX = M(X) if use_gp: # We use a Gaussian Process to model the long term trend. # We do this by estimating the long term trend y by applying the # preliminary PLD model defined above and subtracting it from the raw light curve. # The "in transit" cadences are masked out in this step to prevent the # long term approximation from over-fitting the transits. y = M(rawflux) - np.dot( MX, np.linalg.solve(np.dot(MX.T, MX), np.dot(MX.T, M(rawflux)))) # Estimate the amplitude parameter of a Matern-3/2 kernel GP # by computing the standard deviation of y. amp = np.nanstd(y) tau = gp_timescale # tau is a user-defined parameter # set up gaussian process using celerite # we use a Matern-3/2 kernel for its flexibility and non-periodicity kernel = celerite.terms.Matern32Term(np.log(amp), np.log(tau)) gp = celerite.GP(kernel) # recover GP covariance matrix from celerite model # sigma is expected to have shape (n_unmasked_cadences, n_unmasked_cadences) sigma = gp.get_matrix(M(self.time)) + \ np.diag( np.sum(M(flux_err_crop).reshape(len(M(flux_err_crop)), -1), axis=1)**2 ) else: sigma = np.diag( np.sum(M(flux_err_crop).reshape(len(M(flux_err_crop)), -1), axis=1)**2) # compute the coefficients C on the basis vectors; # the PLD design matrix will be dotted with C to solve for the noise model. A = np.dot(MX.T, np.linalg.solve(sigma, MX)) B = np.dot(MX.T, np.linalg.solve(sigma, M(rawflux))) C = np.linalg.solve(A, B) # shape (regressors, 1) # compute detrended light curve model = np.dot(X, C) self.detrended_flux = rawflux - (model - np.nanmean(model)) # Create and return a new LightCurve object with the corrected flux corrected_lc = lc.copy() corrected_lc.flux = self.detrended_flux corrected_lc.flux_err = flux_err return corrected_lc
def create_design_matrix(self, pld_order=3, pca_components=16, background_aperture_mask='background', pld_aperture_mask=None, spline_n_knots=100, spline_degree=3, sparse=False): """Returns a `.DesignMatrixCollection` containing a `DesignMatrix` object for the background regressors, the PLD pixel component regressors, and the spline regressors. If the parameters `pld_order` and `pca_components` are None, their value will be assigned based on the mission. K2 and TESS experience different dominant sources of noise, and require different defaults. For information about how the defaults were chosen, see Pull Request #746. Parameters ---------- pld_order : int The order of Pixel Level De-correlation to be performed. First order (`n=1`) uses only the pixel fluxes to construct the design matrix. Higher order populates the design matrix with columns constructed from the products of pixel fluxes. pca_components : int or tuple of int Number of terms added to the design matrix for each order of PLD pixel fluxes. Increasing this value may provide higher precision at the expense of slower speed and/or overfitting. If performing PLD with `pld_order > 1`, `pca_components` can be a tuple containing the number of terms for each order of PLD. If a single int is passed, the same number of terms will be used for each order. If zero is passed, PCA will not be performed. Defaults to 16 for K2 and 8 for TESS. background_aperture_mask : array-like or None A boolean array flagging the background pixels such that `True` means that the pixel will be used to generate the background systematics model. If `None`, all pixels which are fainter than 1-sigma above the median flux will be used. pld_aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None A boolean array describing the aperture such that `True` means that the pixel will be used when selecting the PLD basis vectors. If `None` or `all` are passed in, all pixels will be used. If 'pipeline' is passed, the mask suggested by the official pipeline will be returned. If 'threshold' is passed, all pixels brighter than 3-sigma above the median flux will be used. spline_n_knots : int Number of knots in spline. spline_degree : int Polynomial degree of spline. sparse : bool Whether to create `SparseDesignMatrix`. Returns ------- dm : `.DesignMatrixCollection` `.DesignMatrixCollection` containing pixel, background, and spline components. """ # Validate the inputs pld_aperture_mask = self.tpf._parse_aperture_mask(pld_aperture_mask) background_aperture_mask = self.tpf._parse_aperture_mask( background_aperture_mask) self.pld_aperture_mask = pld_aperture_mask self.background_aperture_mask = background_aperture_mask if sparse: DMC = SparseDesignMatrixCollection spline = create_sparse_spline_matrix else: DMC = DesignMatrixCollection spline = create_spline_matrix # First, we estimate the per-pixel background flux over time bkg = self.tpf.estimate_background( aperture_mask=background_aperture_mask) self.background_estimate = bkg # Background-subtracted, flux-normalized pixel time series regressors = self.tpf.flux[:, pld_aperture_mask].reshape( len(self.tpf.flux), -1) regressors = regressors - bkg.flux.reshape( -1, 1) * pld_aperture_mask.sum() * u.pixel regressors = np.array([r[np.isfinite(r)] for r in regressors]) regressors = np.array( [r / f for r, f in zip(regressors, self.lc.flux.value)]) # Use the DesignMatrix infrastructure to apply PCA to the regressors. with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='.*low rank.*') regressors_dm = DesignMatrix(regressors) if isinstance(pca_components, (tuple, list)): ncomp = pca_components[0] else: ncomp = pca_components if ncomp > 0: regressors_dm = regressors_dm.pca(ncomp) regressors_pld = regressors_dm.values # Create a DesignMatrix for each PLD order all_pld = [] for order in range(1, pld_order + 1): reg_n = np.product(list(multichoose(regressors_pld.T, order)), axis=1).T with warnings.catch_warnings(): warnings.filterwarnings('ignore', message='.*low rank.*') pld_n = DesignMatrix(reg_n) # Apply PCA. Check if pca_components has an entry for each order, # otherwise use pca_components for PCA of higher order matrices. if isinstance(pca_components, (tuple, list)): ncomp = pca_components[order - 1] else: ncomp = pca_components if ncomp > 0: pld_n = pld_n.pca(ncomp) all_pld.append(pld_n) # Create the collection of DesignMatrix objects. # DesignMatrix 1 contains the PLD pixel series dm_pixels = DesignMatrixCollection(all_pld).to_designmatrix( name='pixel_series') # DesignMatrix 2 contains the average per-pixel background flux # The prior on the background flux is set equal to the number of pixels # in the light curve aperture mask; this assumes the background is additive. bkg_prior_mu = self.tpf._parse_aperture_mask( self.lc.meta['aperture_mask']).sum() dm_bkg = DesignMatrix(bkg.flux.value, name='background_model', prior_mu=bkg_prior_mu, prior_sigma=1) # DesignMatrix 3 contains splines plus a constant dm_spline = spline(self.lc.time.value, n_knots=spline_n_knots, degree=spline_degree).append_constant() with warnings.catch_warnings(): warnings.filterwarnings( 'ignore', message='.*Not all matrices are `SparseDesignMatrix` objects..*' ) dm_collection = DMC([dm_pixels, dm_bkg, dm_spline]) return dm_collection