def X(self, i, j=slice(None, None, None)):
     X1 = self.fpix[j] / self.norm[j].reshape(-1, 1)
     X = np.product(list(multichoose(X1.T, i + 1)), axis=1).T
     if self.X1N is not None:
         return np.hstack([X, self.X1N[j]**(i + 1)])
     else:
         return X
Exemple #2
0
    def perform_PLD(self, fpix, motion, mask):
        '''
        Perform first order PLD on a light curve
        Returns: detrended light curve, raw light curve
        '''

        outM = lambda x: np.delete(x, mask, axis=0)
        # hack
        naninds = np.where(np.isnan(fpix))
        fpix[naninds] = 0

        #  generate flux light curve
        fpix = outM(fpix)
        fpix_rs = fpix.reshape(len(fpix), -1)
        flux = np.sum(fpix_rs, axis=1)

        # First order PLD
        f1 = fpix_rs / flux.reshape(-1, 1)
        pca = PCA(n_components=20)
        X1 = pca.fit_transform(f1)

        # Second order PLD
        f2 = np.product(list(multichoose(f1.T, 2)), axis=1).T
        pca = PCA(n_components=10)
        X2 = pca.fit_transform(f2)

        X10 = np.load('masks/larger_aperture/X10_%i.npz' % motion)['X']
        X10crop = []
        for i in X10:
            X10crop.append(i[1:])
        X10crop = np.array(outM(X10crop))

        # Combine them and add a column vector of 1s for stability
        X3 = np.hstack([np.ones(X1.shape[0]).reshape(-1, 1), X1, X2])
        X = np.concatenate((X3, X10crop), axis=1)

        # np.savez(('masks/larger_aperture/X10_%i'%motion),X=X)
        MX = self.M(X)

        A = np.dot(MX.T, MX)
        B = np.dot(MX.T, self.M(flux))
        C = np.linalg.solve(A, B)

        # compute detrended light curve
        model = np.dot(X, C)

        detrended = flux - model + np.nanmean(flux)

        # folded
        # D = (detrended - np.dot(C[1:], X[:,1:].T) + np.nanmedian(detrended)) / np.nanmedian(detrended)
        # T = (t - 5.0 - per / 2.) % per - per / 2.

        return detrended, flux
Exemple #3
0
    def X(self, i, j=slice(None, None, None)):
        '''
        Computes the design matrix at the given *PLD* order and the given
        indices. The columns are the *PLD* vectors for the target at the
        corresponding order, computed as the product of the fractional pixel
        flux of all sets of :py:obj:`n` pixels, where :py:obj:`n` is the *PLD*
        order.

        '''

        X1 = self.fpix[j] / self.norm[j].reshape(-1, 1)
        X = np.product(list(multichoose(X1.T, i + 1)), axis=1).T
        if self.X1N is not None:
            return np.hstack([X, self.X1N[j]**(i + 1)])
        else:
            return X
Exemple #4
0
    def X(self, i, j=slice(None, None, None)):
        '''
        Computes the design matrix at the given *PLD* order and the given
        indices. The columns are the *PLD* vectors for the target at the
        corresponding order, computed as the product of the fractional pixel
        flux of all sets of :py:obj:`n` pixels, where :py:obj:`n` is the *PLD*
        order.

        '''

        X1 = self.fpix[j] / self.norm[j].reshape(-1, 1)
        X = np.product(list(multichoose(X1.T, i + 1)), axis=1).T
        if self.X1N is not None:
            return np.hstack([X, self.X1N[j] ** (i + 1)])
        else:
            return X
    def create_design_matrix(
        self,
        pld_order=3,
        pca_components=16,
        pld_aperture_mask=None,
        background_aperture_mask="background",
        spline_n_knots=None,
        spline_degree=3,
        normalize_background_pixels=None,
        sparse=False,
    ):
        """Returns a `.DesignMatrixCollection` containing a `DesignMatrix` object
        for the background regressors, the PLD pixel component regressors, and
        the spline regressors.

        If the parameters `pld_order` and `pca_components` are None, their
        value will be assigned based on the mission. K2 and TESS experience
        different dominant sources of noise, and require different defaults.
        For information about how the defaults were chosen, see Pull Request #746.

        Parameters
        ----------
        pld_order : int
            The order of Pixel Level De-correlation to be performed. First order
            (`n=1`) uses only the pixel fluxes to construct the design matrix.
            Higher order populates the design matrix with columns constructed
            from the products of pixel fluxes.
        pca_components : int or tuple of int
            Number of terms added to the design matrix for each order of PLD
            pixel fluxes. Increasing this value may provide higher precision
            at the expense of slower speed and/or overfitting.
            If performing PLD with `pld_order > 1`, `pca_components` can be
            a tuple containing the number of terms for each order of PLD.
            If a single int is passed, the same number of terms will be used
            for each order. If zero is passed, PCA will not be performed.
            Defaults to 16 for K2 and 8 for TESS.
        pld_aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None
            A boolean array describing the aperture such that `True` means
            that the pixel will be used when selecting the PLD basis vectors.
            If `None` or `all` are passed in, all pixels will be used.
            If 'pipeline' is passed, the mask suggested by the official pipeline
            will be returned.
            If 'threshold' is passed, all pixels brighter than 3-sigma above
            the median flux will be used.
        spline_n_knots : int
            Number of knots in spline.
        spline_degree : int
            Polynomial degree of spline.
        sparse : bool
            Whether to create `SparseDesignMatrix`.

        Returns
        -------
        dm : `.DesignMatrixCollection`
            `.DesignMatrixCollection` containing pixel, background, and spline
            components.
        """
        # Validate the inputs
        pld_aperture_mask = self.tpf._parse_aperture_mask(pld_aperture_mask)
        self.pld_aperture_mask = pld_aperture_mask
        background_aperture_mask = self.tpf._parse_aperture_mask(
            background_aperture_mask)
        self.background_aperture_mask = background_aperture_mask

        if spline_n_knots is None:
            # Default to a spline per 50 data points
            spline_n_knots = int(len(self.lc) / 50)

        if sparse:
            DMC = SparseDesignMatrixCollection
            spline = create_sparse_spline_matrix
        else:
            DMC = DesignMatrixCollection
            spline = create_spline_matrix

        # We set the width of all coefficient priors to 10 times the standard
        # deviation to prevent the fit from going crazy.
        prior_sigma = np.nanstd(self.lc.flux.value) * 10

        # Flux normalize background components for K2 and not for TESS by default
        bkg_pixels = self.tpf.flux[:, background_aperture_mask].reshape(
            len(self.tpf.flux), -1)
        if normalize_background_pixels:
            bkg_flux = np.nansum(self.tpf.flux[:, background_aperture_mask],
                                 -1)
            bkg_pixels = np.array(
                [r / f for r, f in zip(bkg_pixels, bkg_flux)])
        else:
            bkg_pixels = bkg_pixels.value

        # Remove NaNs
        bkg_pixels = np.array([r[np.isfinite(r)] for r in bkg_pixels])

        # Create background design matrix
        dm_bkg = DesignMatrix(bkg_pixels, name="background")
        # Apply PCA
        dm_bkg = dm_bkg.pca(pca_components)
        # Set prior sigma to 10 * standard deviation
        dm_bkg.prior_sigma = np.ones(dm_bkg.shape[1]) * prior_sigma

        # Create a design matric containing splines plus a constant
        dm_spline = spline(self.lc.time.value,
                           n_knots=spline_n_knots,
                           degree=spline_degree).append_constant()
        # Set prior sigma to 10 * standard deviation
        dm_spline.prior_sigma = np.ones(dm_spline.shape[1]) * prior_sigma

        # Create a PLD matrix if there are pixels in the pld_aperture_mask
        if np.sum(pld_aperture_mask) != 0:
            # Flux normalize the PLD components
            pld_pixels = self.tpf.flux[:, pld_aperture_mask].reshape(
                len(self.tpf.flux), -1)
            pld_pixels = np.array(
                [r / f for r, f in zip(pld_pixels, self.lc.flux.value)])
            # Remove NaNs
            pld_pixels = np.array([r[np.isfinite(r)] for r in pld_pixels])

            # Use the DesignMatrix infrastructure to apply PCA to the regressors.
            regressors_dm = DesignMatrix(pld_pixels)
            if pca_components > 0:
                regressors_dm = regressors_dm.pca(pca_components)
            regressors_pld = regressors_dm.values

            # Create a DesignMatrix for each PLD order
            all_pld = []
            for order in range(1, pld_order + 1):
                reg_n = np.product(list(multichoose(regressors_pld.T, order)),
                                   axis=1).T
                pld_n = DesignMatrix(
                    reg_n,
                    prior_sigma=np.ones(reg_n.shape[1]) * prior_sigma /
                    reg_n.shape[1],
                    name=f"pld_order_{order}",
                )
                # Apply PCA.
                if pca_components > 0:
                    pld_n = pld_n.pca(pca_components)
                    # Calling pca() resets the priors, so we set them again.
                    pld_n.prior_sigma = (np.ones(pld_n.shape[1]) *
                                         prior_sigma / pca_components)
                all_pld.append(pld_n)

            # Create the collection of DesignMatrix objects.
            # DesignMatrix 1 contains the PLD pixel series
            dm_pixels = DesignMatrixCollection(all_pld).to_designmatrix(
                name="pixel_series")

            with warnings.catch_warnings():
                warnings.filterwarnings(
                    "ignore",
                    message=
                    ".*Not all matrices are `SparseDesignMatrix` objects..*",
                )
                dm_collection = DMC([dm_pixels, dm_bkg, dm_spline])
        else:
            with warnings.catch_warnings():
                warnings.filterwarnings(
                    "ignore",
                    message=
                    ".*Not all matrices are `SparseDesignMatrix` objects..*",
                )
                dm_collection = DMC([dm_bkg, dm_spline])
        return dm_collection
Exemple #6
0
def PLD2(time, flux, ferr, lc, ap, n=None, mask=None, gp_timescale=30):
    if n is None:
        n = min(20, ap.sum())

    xmin, xmax = min(np.where(ap)[0]), max(np.where(ap)[0])
    ymin, ymax = min(np.where(ap)[1]), max(np.where(ap)[1])

    flux_crop = flux[:, xmin:xmax + 1, ymin:ymax + 1]
    ferr_crop = ferr[:, xmin:xmax + 1, ymin:ymax + 1]
    ap_crop = ap[xmin:xmax + 1, ymin:ymax + 1]

    flux_err = np.nansum(ferr_crop[:, ap_crop]**2, axis=1)**0.5

    if mask is None:
        mask = np.where(time)

    #flsa = SavGol(lc)
    #med  = np.nanmedian(lc)
    #MAD  = 1.4826 * np.nanmedian(np.abs(lc - med))
    #print(np.where(~(lc > med + 10.*MAD) | (lc < med - 10.*MAD))[0])

    M = lambda x: x[mask]

    apval = np.copy(ap_crop).astype(int)

    ap_flux = np.array([f * apval
                        for f in flux_crop]).reshape(len(flux_crop), -1)
    rawflux = np.sum(ap_flux.reshape(len(ap_flux), -1), axis=1)

    f1 = ap_flux / rawflux.reshape(-1, 1)
    pca = PCA(n_components=n)
    X1 = pca.fit_transform(f1)

    f2 = np.product(list(multichoose(f1.T, 2)), axis=1).T
    pca = PCA(n_components=n)
    X2 = pca.fit_transform(f2)

    #f3  = np.product(list(multichoose(f1.T, 3)), axis=1).T
    #pca = PCA(n_components=n)
    #X3  = pca.fit_transform(f3)

    X = np.hstack([np.ones(X1.shape[0]).reshape(-1, 1), X1, X2])
    MX = M(X)

    y = M(rawflux) - np.dot(
        MX, np.linalg.solve(np.dot(MX.T, MX), np.dot(MX.T, M(rawflux))))
    amp = np.nanstd(y)
    tau = gp_timescale
    ker = celerite.terms.Matern32Term(np.log(amp), np.log(tau))
    gp = celerite.GP(ker)

    sigma = gp.get_matrix(M(time)) + np.diag(
        np.sum(M(ferr_crop).reshape(len(M(ferr_crop)), -1), axis=1)**2)

    A = np.dot(MX.T, np.linalg.solve(sigma, MX))
    B = np.dot(MX.T, np.linalg.solve(sigma, M(rawflux)))
    C = np.linalg.solve(A, B)

    model = np.dot(X, C)
    det_flux = rawflux - (model - np.nanmean(model))

    return det_flux, flux_err
Exemple #7
0
    def correct(self,
                aperture_mask=None,
                cadence_mask=None,
                gp_timescale=30,
                use_gp=True,
                pld_order=2,
                n_pca_terms=10,
                pld_aperture_mask=None):
        r"""Returns a PLD systematics-corrected LightCurve.

        Parameters
        ----------
        aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None
            A boolean array describing the aperture such that `True` means
            that the pixel will be used to generate the raw flux light curve.
            If `None` or 'all' are passed, all pixels will be used.
            If 'pipeline' is passed, the mask suggested by the official pipeline
            will be returned.
            If 'threshold' is passed, all pixels brighter than 3-sigma above
            the median flux will be used.
        cadence_mask : array-like
            A mask that will be applied to the cadences prior to constructing
            the detrending model. For example, you can pass a boolean array
            of length `n_cadences` where `True` means that the cadence will be
            included in the noise model. You may also pass an array of indices.
            This option enables signals of interest (e.g. planet transits)
            to be excluded from the noise model, which will prevent over-fitting.
            By default, no cadences will be masked.
        gp_timescale : float
            Gaussian Process time scale length term (`tau`) used to define
            length of fit variability in days.
        use_gp : boolean
            Option to turn GP fitting on or off.  You would typically only set
            this to False to speed up the correction (at the cost of precision),
            or if you suspect the presence of systematic noise at long timescales.
        pld_order : int
            The order of Pixel Level De-correlation to be performed. First order
            (`n=1`) uses only the pixel fluxes to construct the design matrix.
            Higher order populates the design matrix with columns constructed
            from the products of pixel fluxes.
        n_pca_terms : int
            Number of terms added to the design matrix from each order of PLD
            when performing Principal Component Analysis for models higher than
            first order. Increasing this value may provide higher precision at
            the expense of computational time.
        pld_aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None
            A boolean array describing the aperture such that `True` means
            that the pixel will be used when selecting the PLD basis vectors.
            If `None` or `all` are passed in, all pixels will be used.
            If 'pipeline' is passed, the mask suggested by the official pipeline
            will be returned.
            If 'threshold' is passed, all pixels brighter than 3-sigma above
            the median flux will be used.

        Returns
        -------
        corrected_lightcurve : `~lightkurve.lightcurve.LightCurve`
            Returns a corrected lightcurve object. Depending on the input, the
            returned object will be a `KeplerLightCurve`, `TessLightCurve`, or
            general `LightCurve` object.
        """
        if use_gp:
            # Verify optional dependency
            try:
                import celerite
            except ImportError:
                log.error("PLD uses the `celerite` Python package. "
                          "See the installation instructions at "
                          "https://docs.lightkurve.org/about/install.html. "
                          "`use_gp` has been set to `False`.")
                use_gp = False

        # Parse the aperture mask to accept strings etc.
        aperture = self.tpf._parse_aperture_mask(aperture_mask)

        # generate flux light curve from desired pixels
        lc = self.tpf.to_lightcurve(aperture_mask=aperture)
        rawflux = lc.flux.value
        rawflux_err = lc.flux_err.value

        # create nan mask
        nanmask = np.isfinite(self.time)
        nanmask &= np.isfinite(rawflux)
        nanmask &= np.isfinite(rawflux_err)
        nanmask &= np.abs(rawflux_err) > 1e-12

        # mask out nan values
        rawflux = rawflux[nanmask]
        rawflux_err = rawflux_err[nanmask]
        self.flux = self.flux[nanmask]
        self.flux_err = self.flux_err[nanmask]
        self.time = self.time[nanmask]

        # parse the PLD aperture mask
        pld_pixel_mask = self.tpf._parse_aperture_mask(pld_aperture_mask)

        # find pixel bounds of aperture on tpf
        xmin, xmax = min(np.where(pld_pixel_mask)[0]), max(
            np.where(pld_pixel_mask)[0])
        ymin, ymax = min(np.where(pld_pixel_mask)[1]), max(
            np.where(pld_pixel_mask)[1])

        # crop data cube to include only desired pixels
        # this is required for superstamps to ensure matrix is invertable
        flux_crop = self.flux[:, xmin:xmax + 1, ymin:ymax + 1]
        flux_err_crop = self.flux_err[:, xmin:xmax + 1, ymin:ymax + 1]
        aperture_crop = pld_pixel_mask[xmin:xmax + 1, ymin:ymax + 1]

        # calculate errors (ignore warnings related to zero or negative errors)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", RuntimeWarning)
            flux_err = np.nansum(flux_err_crop[:, aperture_crop]**2,
                                 axis=1)**0.5

        # first order PLD design matrix
        pld_flux = flux_crop[:, aperture_crop]
        f1 = np.reshape(pld_flux, (len(pld_flux), -1))
        X1 = f1 / np.nansum(pld_flux, axis=-1)[:, None]
        # No NaN pixels
        X1 = X1[:, np.isfinite(X1).all(axis=0)]

        # higher order PLD design matrices
        X_sections = [np.ones((len(flux_crop), 1)), X1]
        for i in range(2, pld_order + 1):
            f2 = np.product(list(multichoose(X1.T, pld_order)), axis=1).T
            try:
                # We use an optional dependency for very fast PCA (fbpca).
                # If the import fails we will fall back on using the slower `np.linalg.svd`
                from fbpca import pca
                components, _, _ = pca(f2, n_pca_terms)
            except ImportError:
                log.error("PLD uses the `fbpca` package. You can pip install "
                          "with `pip install fbpca`. Using `np.linalg.svd` "
                          "instead.")
                components, _, _ = np.linalg.svd(f2)
            X_n = components[:, :n_pca_terms]
            X_sections.append(X_n)

        # Create the design matrix X by stacking X1 and higher order components, and
        # adding a column vector of 1s for numerical stability (see Luger et al.).
        # X has shape (n_components_first + n_components_higher_order + 1, n_cadences)
        X = np.concatenate(X_sections, axis=1)

        # set default transit mask
        if cadence_mask is None:
            cadence_mask = np.ones_like(lc.time, dtype=bool)
        M = lambda x: x[cadence_mask[nanmask]]

        # mask transits in design matrix
        MX = M(X)

        if use_gp:
            # We use a Gaussian Process to model the long term trend.
            # We do this by estimating the long term trend y by applying the
            # preliminary PLD model defined above and subtracting it from the raw light curve.
            # The "in transit" cadences are masked out in this step to prevent the
            # long term approximation from over-fitting the transits.
            XTX = np.dot(MX.T, MX)
            XTX[np.diag_indices_from(XTX)] += 1e-8
            XTy = np.dot(MX.T, M(rawflux))
            y = M(rawflux) - np.dot(MX, np.linalg.solve(XTX, XTy))

            # Estimate the amplitude parameter of a Matern-3/2 kernel GP
            # by computing the standard deviation of y.
            amp = np.nanstd(y)
            tau = gp_timescale  # tau is a user-defined parameter
            # set up gaussian process using celerite
            # we use a Matern-3/2 kernel for its flexibility and non-periodicity
            kernel = celerite.terms.Matern32Term(np.log(amp), np.log(tau))
            gp = celerite.GP(kernel)
            gp.compute(M(self.time), M(rawflux_err))

            # compute the coefficients C on the basis vectors;
            # the PLD design matrix will be dotted with C to solve for the noise model.
            A = np.dot(MX.T, gp.apply_inverse(MX))
            B = np.dot(MX.T, gp.apply_inverse(M(rawflux)[:, None])[:, 0])

        else:
            # compute the coefficients C on the basis vectors;
            # the PLD design matrix will be dotted with C to solve for the noise model.
            ivar = 1.0 / M(rawflux_err)**2  # inverse variance
            A = np.dot(MX.T, MX * ivar[:, None])
            B = np.dot(MX.T, M(rawflux) * ivar)

        # apply prior to design matrix weights for numerical stability
        A[np.diag_indices_from(A)] += 1e-8
        C = np.linalg.solve(A, B)

        # compute detrended light curve
        model = np.dot(X, C)
        self.detrended_flux = rawflux - (model - np.nanmean(model))

        # Create and return a new LightCurve object with the corrected flux
        corrected_lc = lc.copy()[nanmask]
        corrected_lc.flux = self.detrended_flux
        corrected_lc.flux_err = flux_err.value
        return corrected_lc
Exemple #8
0
    def correct(self,
                aperture_mask=None,
                cadence_mask=None,
                gp_timescale=30,
                n_components_first=None,
                n_components_second=20,
                use_gp=True):
        r"""Returns a PLD systematics-corrected LightCurve.

        Parameters
        ----------
        aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None
            A boolean array describing the aperture such that `True` means
            that the pixel will be used.
            If `None` or 'all' are passed, all pixels will be used.
            If 'pipeline' is passed, the mask suggested by the official pipeline
            will be returned.
            If 'threshold' is passed, all pixels brighter than 3-sigma above
            the median flux will be used.
        cadence_mask : array-like
            A mask that will be applied to the cadences prior to constructing
            the detrending model. For example, you can pass a boolean array
            of length `n_cadences` where `True` means that the cadence will be
            included in the noise model. You may also pass an array of indices.
            This option enables signals of interest (e.g. planet transits)
            to be excluded from the noise model, which will prevent over-fitting.
            By default, no cadences will be masked.
        gp_timescale : float
            Gaussian Process time scale length term (`tau`) used to define
            length of fit variability in days.
        n_components_first : int
            Number of first-order PLD components to reduce to with PCA.
            Must be smaller than the number of pixels in the aperture mask.
            If `None`, then 25 or the number of pixels in the mask will be used,
            whichever is smaller.
        n_components_second : int
            Number of second-order PLD components to reduce to with PCA.
        use_gp : boolean
            Option to turn GP fitting on or off.

        Returns
        -------
        corrected_lightcurve : `~lightkurve.lightcurve.LightCurve`
            Returns a corrected lightcurve object. Depending on the input, the
            returned object will be a `KeplerLightCurve`, `TessLightCurve`, or
            general `LightCurve` object.
        """
        # Verify optional dependencies
        try:
            import celerite
        except ImportError:
            log.error("PLD requires the `celerite` Python package. "
                      "See the installation instructions at "
                      "https://docs.lightkurve.org/about/install.html")
            return None
        try:
            from sklearn.decomposition import PCA
        except ImportError:
            log.error("PLD requires the `scikit-learn` Python package. "
                      "See the installation instructions at "
                      "https://docs.lightkurve.org/about/install.html")
            return None

        # Parse the aperture mask to accept strings etc.
        aperture = self.tpf._parse_aperture_mask(aperture_mask)

        # n_components_first cannot be larger than the number of pixels in the mask
        if n_components_first is None:
            n_components_first = min(25, (aperture > 0).sum())

        # find pixel bounds of aperture on tpf
        xmin, xmax = min(np.where(aperture)[0]), max(np.where(aperture)[0])
        ymin, ymax = min(np.where(aperture)[1]), max(np.where(aperture)[1])

        # crop data cube to include only desired pixels
        # this is required for superstamps to ensure matrix is invertable
        flux_crop = self.flux[:, xmin:xmax + 1, ymin:ymax + 1]
        flux_err_crop = self.flux_err[:, xmin:xmax + 1, ymin:ymax + 1]
        aperture_crop = aperture[xmin:xmax + 1, ymin:ymax + 1]

        # calculate errors (ignore warnings related to zero or negative errors)
        with warnings.catch_warnings():
            warnings.simplefilter("ignore", RuntimeWarning)
            flux_err = np.nansum(flux_err_crop[:, aperture_crop]**2,
                                 axis=1)**0.5

        # set default transit mask
        if cadence_mask is None:
            cadence_mask = np.where(self.time)
        M = lambda x: x[cadence_mask]

        # generate flux light curve from desired pixels
        lc = self.tpf.to_lightcurve(aperture_mask=aperture)

        # set aperture values
        aperture_vals = np.copy(aperture_crop).astype(int)

        # `aperture_flux` contains the per-pixel lightcurve in a matrix
        # with shape (n_cadences, n_pixels).
        # We will run PCA on this matrix further below to arrive at the design
        # matrix for the noise model.
        self.aperture_flux = np.array([f * aperture_vals for f in flux_crop
                                       ]).reshape(len(flux_crop), -1)
        rawflux = np.sum(self.aperture_flux.reshape(len(self.aperture_flux),
                                                    -1),
                         axis=1)

        # first order PLD
        f1 = self.aperture_flux / rawflux.reshape(-1, 1)
        pca = PCA(n_components=n_components_first)
        X1 = pca.fit_transform(f1)

        # second order PLD
        f2 = np.product(list(multichoose(f1.T, 2)), axis=1).T
        pca = PCA(n_components=n_components_second)
        X2 = pca.fit_transform(f2)

        # Create the design matrix X by stacking X1 and X2 and adding a column
        # vector of 1s for numerical stability (see Luger et al.).
        # X has shape (n_components_first + n_components_second + 1, n_cadences)
        X = np.hstack([np.ones(X1.shape[0]).reshape(-1, 1), X1, X2])

        # mask transits in design matrix
        MX = M(X)

        if use_gp:
            # We use a Gaussian Process to model the long term trend.
            # We do this by estimating the long term trend y by applying the
            # preliminary PLD model defined above and subtracting it from the raw light curve.
            # The "in transit" cadences are masked out in this step to prevent the
            # long term approximation from over-fitting the transits.
            y = M(rawflux) - np.dot(
                MX, np.linalg.solve(np.dot(MX.T, MX), np.dot(MX.T,
                                                             M(rawflux))))
            # Estimate the amplitude parameter of a Matern-3/2 kernel GP
            # by computing the standard deviation of y.
            amp = np.nanstd(y)
            tau = gp_timescale  # tau is a user-defined parameter
            # set up gaussian process using celerite
            # we use a Matern-3/2 kernel for its flexibility and non-periodicity
            kernel = celerite.terms.Matern32Term(np.log(amp), np.log(tau))
            gp = celerite.GP(kernel)

            # recover GP covariance matrix from celerite model
            # sigma is expected to have shape (n_unmasked_cadences, n_unmasked_cadences)
            sigma = gp.get_matrix(M(self.time)) + \
                np.diag(
                    np.sum(M(flux_err_crop).reshape(len(M(flux_err_crop)), -1), axis=1)**2
                       )
        else:
            sigma = np.diag(
                np.sum(M(flux_err_crop).reshape(len(M(flux_err_crop)), -1),
                       axis=1)**2)

        # compute the coefficients C on the basis vectors;
        # the PLD design matrix will be dotted with C to solve for the noise model.
        A = np.dot(MX.T, np.linalg.solve(sigma, MX))
        B = np.dot(MX.T, np.linalg.solve(sigma, M(rawflux)))
        C = np.linalg.solve(A, B)  # shape (regressors, 1)

        # compute detrended light curve
        model = np.dot(X, C)
        self.detrended_flux = rawflux - (model - np.nanmean(model))

        # Create and return a new LightCurve object with the corrected flux
        corrected_lc = lc.copy()
        corrected_lc.flux = self.detrended_flux
        corrected_lc.flux_err = flux_err
        return corrected_lc
Exemple #9
0
    def create_design_matrix(self,
                             pld_order=3,
                             pca_components=16,
                             background_aperture_mask='background',
                             pld_aperture_mask=None,
                             spline_n_knots=100,
                             spline_degree=3,
                             sparse=False):
        """Returns a `.DesignMatrixCollection` containing a `DesignMatrix` object
        for the background regressors, the PLD pixel component regressors, and
        the spline regressors.

        If the parameters `pld_order` and `pca_components` are None, their
        value will be assigned based on the mission. K2 and TESS experience
        different dominant sources of noise, and require different defaults.
        For information about how the defaults were chosen, see Pull Request #746.

        Parameters
        ----------
        pld_order : int
            The order of Pixel Level De-correlation to be performed. First order
            (`n=1`) uses only the pixel fluxes to construct the design matrix.
            Higher order populates the design matrix with columns constructed
            from the products of pixel fluxes.
        pca_components : int or tuple of int
            Number of terms added to the design matrix for each order of PLD
            pixel fluxes. Increasing this value may provide higher precision
            at the expense of slower speed and/or overfitting.
            If performing PLD with `pld_order > 1`, `pca_components` can be
            a tuple containing the number of terms for each order of PLD.
            If a single int is passed, the same number of terms will be used
            for each order. If zero is passed, PCA will not be performed.
            Defaults to 16 for K2 and 8 for TESS.
        background_aperture_mask : array-like or None
            A boolean array flagging the background pixels such that `True` means
            that the pixel will be used to generate the background systematics model.
            If `None`, all pixels which are fainter than 1-sigma above the median
            flux will be used.
        pld_aperture_mask : array-like, 'pipeline', 'all', 'threshold', or None
            A boolean array describing the aperture such that `True` means
            that the pixel will be used when selecting the PLD basis vectors.
            If `None` or `all` are passed in, all pixels will be used.
            If 'pipeline' is passed, the mask suggested by the official pipeline
            will be returned.
            If 'threshold' is passed, all pixels brighter than 3-sigma above
            the median flux will be used.
        spline_n_knots : int
            Number of knots in spline.
        spline_degree : int
            Polynomial degree of spline.
        sparse : bool
            Whether to create `SparseDesignMatrix`.

        Returns
        -------
        dm : `.DesignMatrixCollection`
            `.DesignMatrixCollection` containing pixel, background, and spline
            components.
        """
        # Validate the inputs
        pld_aperture_mask = self.tpf._parse_aperture_mask(pld_aperture_mask)
        background_aperture_mask = self.tpf._parse_aperture_mask(
            background_aperture_mask)
        self.pld_aperture_mask = pld_aperture_mask
        self.background_aperture_mask = background_aperture_mask

        if sparse:
            DMC = SparseDesignMatrixCollection
            spline = create_sparse_spline_matrix
        else:
            DMC = DesignMatrixCollection
            spline = create_spline_matrix

        # First, we estimate the per-pixel background flux over time
        bkg = self.tpf.estimate_background(
            aperture_mask=background_aperture_mask)
        self.background_estimate = bkg

        # Background-subtracted, flux-normalized pixel time series
        regressors = self.tpf.flux[:, pld_aperture_mask].reshape(
            len(self.tpf.flux), -1)
        regressors = regressors - bkg.flux.reshape(
            -1, 1) * pld_aperture_mask.sum() * u.pixel
        regressors = np.array([r[np.isfinite(r)] for r in regressors])
        regressors = np.array(
            [r / f for r, f in zip(regressors, self.lc.flux.value)])

        # Use the DesignMatrix infrastructure to apply PCA to the regressors.
        with warnings.catch_warnings():
            warnings.filterwarnings('ignore', message='.*low rank.*')
            regressors_dm = DesignMatrix(regressors)
        if isinstance(pca_components, (tuple, list)):
            ncomp = pca_components[0]
        else:
            ncomp = pca_components
        if ncomp > 0:
            regressors_dm = regressors_dm.pca(ncomp)
        regressors_pld = regressors_dm.values

        # Create a DesignMatrix for each PLD order
        all_pld = []
        for order in range(1, pld_order + 1):
            reg_n = np.product(list(multichoose(regressors_pld.T, order)),
                               axis=1).T
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore', message='.*low rank.*')
                pld_n = DesignMatrix(reg_n)
            # Apply PCA. Check if pca_components has an entry for each order,
            # otherwise use pca_components for PCA of higher order matrices.
            if isinstance(pca_components, (tuple, list)):
                ncomp = pca_components[order - 1]
            else:
                ncomp = pca_components
            if ncomp > 0:
                pld_n = pld_n.pca(ncomp)
            all_pld.append(pld_n)

        # Create the collection of DesignMatrix objects.
        # DesignMatrix 1 contains the PLD pixel series
        dm_pixels = DesignMatrixCollection(all_pld).to_designmatrix(
            name='pixel_series')
        # DesignMatrix 2 contains the average per-pixel background flux
        # The prior on the background flux is set equal to the number of pixels
        # in the light curve aperture mask; this assumes the background is additive.
        bkg_prior_mu = self.tpf._parse_aperture_mask(
            self.lc.meta['aperture_mask']).sum()
        dm_bkg = DesignMatrix(bkg.flux.value,
                              name='background_model',
                              prior_mu=bkg_prior_mu,
                              prior_sigma=1)
        # DesignMatrix 3 contains splines plus a constant
        dm_spline = spline(self.lc.time.value,
                           n_knots=spline_n_knots,
                           degree=spline_degree).append_constant()

        with warnings.catch_warnings():
            warnings.filterwarnings(
                'ignore',
                message='.*Not all matrices are `SparseDesignMatrix` objects..*'
            )
            dm_collection = DMC([dm_pixels, dm_bkg, dm_spline])
        return dm_collection