コード例 #1
0
 def mask_outliers(self, max_sigma=5, pv=None):
     pv = self.kernel.pv0 if pv is None else pv
     qm = self.data.quality_mask
     predicted_flux = self.predict(pv)
     residuals = self.data.unmasked_normalised_flux - predicted_flux
     flux_median, flux_std = medsig(residuals[qm])
     outlier_mask = ones_like(self.data.mask)
     outlier_mask[qm] = abs(residuals[qm] - flux_median) < (max_sigma *
                                                            flux_std)
     self.data.set_mask(outlier_mask)
     return outlier_mask.sum()
コード例 #2
0
    def __init__(self, flux, inputs, mask=None):
        self.flux = array(flux)
        self.inputs = array(inputs)
        self.quality_mask = array(mask) if mask is not None else ones(
            self.flux.size, np.bool)
        self.outlier_mask = ones(self.flux.size, np.bool)
        self.mask = self.quality_mask & self.outlier_mask
        self.npt = self.flux.size
        self._fm, self._fs = medsig(self.flux[self.mask])

        assert self.flux.ndim == 1, 'The flux array for DtData should be 1D [npt]'
        assert self.inputs.ndim == 2, 'The input array for DtData should be 2D [npt,3]'
        assert self.inputs.shape[
            1] == 3, 'The input array for DtData should be 2D with the shape [npt,3]'
        assert self.quality_mask.ndim == 1, 'The mask array for DtData should be 1D [npt]'
コード例 #3
0
ファイル: dtdata.py プロジェクト: OxES/k2sc
    def __init__(self, flux, inputs, mask=None):
        self._flux = array(flux)
        self._inputs = array(inputs)
        self._mask = array(mask) if mask is not None else ones(self._flux.size, np.bool)
        self._fm, self._fs = medsig(self.masked_flux)
        self.nptm = self.masked_flux.size
        self.nptu = self.unmasked_flux.size

        assert self._flux.ndim == 1, "The flux array for DtData should be 1D [npt]"
        assert self._inputs.ndim == 2, "The input array for DtData should be 2D [npt,3]"
        assert self._inputs.shape[1] == 3, "The input array for DtData should be 2D with the shape [npt,3]"

        self.mf, self.uf = self.masked_flux, self.unmasked_flux
        self.mi, self.ui = self.masked_inputs, self.unmasked_inputs
        self.mt, self.ut = self.masked_time, self.unmasked_time
        self.mx, self.ux = self.masked_x, self.unmasked_x
        self.my, self.uy = self.masked_y, self.unmasked_y
コード例 #4
0
    def __init__(self, flux, inputs, mask=None):
        self._flux = array(flux)
        self._inputs = array(inputs)
        self._mask = array(mask) if mask is not None else ones(
            self._flux.size, np.bool)
        self._fm, self._fs = medsig(self.masked_flux)
        self.nptm = self.masked_flux.size
        self.nptu = self.unmasked_flux.size

        assert self._flux.ndim == 1, 'The flux array for DtData should be 1D [npt]'
        assert self._inputs.ndim == 2, 'The input array for DtData should be 2D [npt,3]'
        assert self._inputs.shape[
            1] == 3, 'The input array for DtData should be 2D with the shape [npt,3]'

        self.mf, self.uf = self.masked_flux, self.unmasked_flux
        self.mi, self.ui = self.masked_inputs, self.unmasked_inputs
        self.mt, self.ut = self.masked_time, self.unmasked_time
        self.mx, self.ux = self.masked_x, self.unmasked_x
        self.my, self.uy = self.masked_y, self.unmasked_y
コード例 #5
0
def rebin_err(t, f, ferr=None, dt=0.02, ferr_type='medsig', ferr_style='std'):
    """
    @written by Ed Gillen
    The standard rebin function but also dealing with errors
    on the individual data points being binned.
    ferr_type:
        'medsig'
        'meanstd'
    ferr_style:
        'std'
        'sem' = std / sqrt(N)
    """
    treg = np.r_[t.min():t.max():dt]
    nreg = len(treg)
    freg = np.zeros(nreg) + np.nan
    #    if ferr!=None:
    freg_err = np.ma.zeros(nreg) + np.nan
    for i in np.arange(nreg):
        l = (t >= treg[i]) * (t < treg[i] + dt)
        if l.any():
            treg[i] = np.ma.mean(t[l])
            if ferr == None:
                if ferr_type == 'medsig':
                    freg[i], freg_err[i] = medsig(f[l])
                else:
                    freg[i] = np.nanmean(f[l])
                    freg_err[i] = np.nanstd(f[l])

                if ferr_style == 'sem':
                    freg_err[i] /= np.sqrt(len(f[l]))
            else:
                freg[i], freg_err[i] = weighted_avg_and_std(
                    f[l], np.ma.array([1 / float(x) for x in ferr[l]]))
    l = np.isfinite(freg)
    #    if ferr==None:
    #        return treg[l], freg[l]
    return treg[l], freg[l], freg_err[l]
コード例 #6
0
ファイル: k2sc.py プロジェクト: petigura/k2sc
def detrend(dataset, args):
    """
    Needs to have args defined
    """

    ## Setup the logger
    ## ----------------
    logger  = logging.getLogger('Worker %i' % mpi_rank)
    logger.name = '<{:d}>'.format(dataset.epic)

    np.seterrcall(lambda e,f: logger.info(e))
    np.seterr(invalid='ignore')

    ## Main variables
    ## --------------
    Result  = namedtuple('SCResult', 'detrender pv tr_time tr_position cdpp_r cdpp_t cdpp_c warn')
    results = []  # a list of Result tuples, one per aperture
    masks   = []  # a list of light curve masks, one per aperture 

    ## Initialise utility variables
    ## ----------------------------
    ds   = dataset
    info = logger.info

    ## Periodic signal masking
    ## -----------------------
    if args.p_mask_center and args.p_mask_period and args.p_mask_duration:
        ds.mask_periodic_signal(
            args.p_mask_center, args.p_mask_period, args.p_mask_duration
            )

    ## Initial outlier and period detection
    ## ------------------------------------
    ## We carry out an initial outlier and period detection using
    ## a default GP hyperparameter vector based on campaign 4 fits
    ## done using (almost) noninformative priors.

    for iset in range(ds.nsets):
        flux = ds.fluxes[iset]
        inputs = np.transpose([ds.time,ds.x,ds.y])
        detrender = Detrender(
            flux, inputs, mask=isfinite(flux), splits=args.splits, 
            kernel=BasicKernelEP(), tr_nrandom=args.tr_nrandom,
            tr_nblocks=args.tr_nblocks, tr_bspan=args.tr_bspan
            )
    
        ttrend,ptrend = detrender.predict(
            detrender.kernel.pv0+1e-5, components=True
            )

        cflux = flux - ptrend + median(ptrend) - ttrend + median(ttrend)
        cflux /= nanmedian(cflux)

        ## Iterative sigma-clipping
        ## ------------------------
        info('Starting initial outlier detection')
        fmask  = isfinite(cflux)
        omask  = fmask.copy()
        i, nm  = 0, None
        while nm != omask.sum() and i<10:
            nm = omask.sum()
            _, sigma = medsig(cflux[omask])
            omask[fmask] &= (cflux[fmask] < 1+5*sigma) & (cflux[fmask] > 1-5*sigma)
            i += 1
        masks.append(fmask)
        ofrac = (~omask).sum() / omask.size
        if ofrac < 0.25:
            masks[-1] &= omask
            info('  Flagged %i (%4.1f%%) outliers.', (~omask).sum(), ofrac)
        else:
            info('  Found %i (%4.1f%%) outliers. Not flagging..', (~omask).sum(), ofrac)

        ## Lomb-Scargle period search
        ## --------------------------
        info('Starting Lomb-Scargle period search')
        mask  = masks[-1]
        nflux = flux - ptrend + nanmedian(ptrend)
        ntime = ds.time - ds.time.mean()
        pflux = np.poly1d(np.polyfit(ntime[mask], nflux[mask], 9))(ntime)
        period, fap = psearch(ds.time[mask], (nflux-pflux)[mask], args.ls_min_period, args.ls_max_period)
        
        if fap < 1e-50:
            ds.is_periodic = True
            ds.ls_fap    = fap
            ds.ls_period = period
        
    ## Kernel selection
    ## ----------------
    args.kernel='basic'
    if args.kernel:
        info('Overriding automatic kernel selection, using %s kernel as given in the command line', args.kernel)
        if 'periodic' in args.kernel and not args.kernel_period:
            logger.critical('Need to give period (--kernel-period) if overriding automatic kernel detection with a periodic kernel. Quitting.')
            exit(1)
        kernel = kernels[args.kernel](period=args.kernel_period)
    else:
        info('  Using %s position kernel', args.default_position_kernel)
        if ds.is_periodic:
            info('  Found periodicity p = {:7.2f} (fap {:7.4e} < 1e-50), will use a quasiperiodic kernel'.format(ds.ls_period, ds.ls_fap))
        else:
            info('  No strong periodicity found, using a basic kernel')

        if args.default_position_kernel.lower() == 'sqrexp':
            kernel = QuasiPeriodicKernel(period=ds.ls_period)   if ds.is_periodic else BasicKernel() 
        else:
            kernel = QuasiPeriodicKernelEP(period=ds.ls_period) if ds.is_periodic else BasicKernelEP()


    ## Detrending
    ## ----------
    for iset in range(ds.nsets):
        if ds.nsets > 1:
            logger.name = 'Worker {:d} <{:d}-{:d}>'.format(mpi_rank, dataset.epic, iset+1)
        np.random.seed(args.seed)
        tstart = time()
        inputs = np.transpose([ds.time,ds.x,ds.y])
        detrender = Detrender(ds.fluxes[iset], inputs, mask=masks[iset],
                              splits=args.splits, kernel=kernel, tr_nrandom=args.tr_nrandom,
                              tr_nblocks=args.tr_nblocks, tr_bspan=args.tr_bspan)

        de = DiffEvol(detrender.neglnposterior, kernel.bounds, args.de_npop)

        ## Period population generation
        ## ----------------------------
        if isinstance(kernel, QuasiPeriodicKernel):
            de._population[:,2] = np.clip(normal(kernel.period, 0.1*kernel.period, size=de.n_pop),
                                          args.ls_min_period, args.ls_max_period)

        ## Global hyperparameter optimisation
        ## ----------------------------------
        info('Starting global hyperparameter optimisation using DE')
        tstart_de = time()
        for i,r in enumerate(de(args.de_niter)):
            info('  DE iteration %3i -ln(L) %4.1f', i, de.minimum_value)
            tcur_de = time()
            if ((de._fitness.ptp() < 3) or (tcur_de - tstart_de > args.de_max_time)) and (i>2):
                break
        info('  DE finished in %i seconds', tcur_de-tstart_de)
        info('  DE minimum found at: %s', np.array_str(de.minimum_location, precision=3, max_line_width=250))
        info('  DE -ln(L) %4.1f', de.minimum_value)

        ## Local hyperparameter optimisation
        ## ---------------------------------
        info('Starting local hyperparameter optimisation')
        try:
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore', category=RuntimeWarning, append=True)
                pv, warn = detrender.train(de.minimum_location)
        except ValueError as e:
            logger.error('Local optimiser failed, %s', e)
            logger.error('Skipping the file')
            return
        info('  Local minimum found at: %s', np.array_str(pv, precision=3))

        ## Trend computation
        ## -----------------
        (mt,tt),(mp,tp) = map(lambda a: (nanmedian(a), a-nanmedian(a)), detrender.predict(pv, components=True))

        ## Iterative sigma-clipping
        ## ------------------------
        info('Starting final outlier detection')
        flux = detrender.data.unmasked_flux
        cflux = flux-tp-tt
        cflux /= nanmedian(cflux)

        fmask = isfinite(cflux)
        mhigh = zeros_like(fmask)
        mlow  = zeros_like(fmask)
        mask  = fmask.copy()
        i, nm = 0, None
        while nm != mask.sum() and i<10:
            nm = mask.sum()
            _, sigma = medsig(cflux[mask])
            mhigh[fmask] = cflux[fmask] > 1+5*sigma
            mlow[fmask]  = cflux[fmask] < 1-5*sigma
            mask &= fmask & (~mlow) & (~mhigh)
            i += 1
        ds.mflags[iset][~fmask] |= M_NOTFINITE
        ds.mflags[iset][mhigh]  |= M_OUTLIER_U
        ds.mflags[iset][mlow]   |= M_OUTLIER_D
        
        info('  %5i too high', mhigh.sum())
        info('  %5i too low',  mlow.sum())
        info('  %5i not finite', (~fmask).sum())

        ## Detrending and CDPP computation
        ## -------------------------------
        info('Computing time and position trends')
        dd = detrender.data
        cdpp_r = cdpp(dd.masked_time,   dd.masked_flux)
        cdpp_t = cdpp(dd.unmasked_time, dd.unmasked_flux-tp,    exclude=~dd.mask)
        cdpp_c = cdpp(dd.unmasked_time, dd.unmasked_flux-tp-tt, exclude=~dd.mask)
        results.append(Result(detrender, pv, tt+mt, tp+mp, cdpp_r, cdpp_t, cdpp_c, warn))
        info('  CDPP - raw - %6.3f', cdpp_r)
        info('  CDPP - position component removed - %6.3f', cdpp_t)
        info('  CDPP - full reduction - %6.3f', cdpp_c)
        info('Detrending time %6.3f', time()-tstart)

    info('Finished')
    return dataset, results
コード例 #7
0
def detrend(dataset, args):
    """
    Needs to have args defined
    """

    ## Setup the logger
    ## ----------------
    logger = logging.getLogger('Worker %i' % mpi_rank)
    logger.name = '<{:d}>'.format(dataset.epic)

    np.seterrcall(lambda e, f: logger.info(e))
    np.seterr(invalid='ignore')

    ## Main variables
    ## --------------
    Result = namedtuple(
        'SCResult',
        'detrender pv tr_time tr_position cdpp_r cdpp_t cdpp_c warn')
    results = []  # a list of Result tuples, one per aperture
    masks = []  # a list of light curve masks, one per aperture

    ## Initialise utility variables
    ## ----------------------------
    ds = dataset
    info = logger.info

    ## Periodic signal masking
    ## -----------------------
    if args.p_mask_center and args.p_mask_period and args.p_mask_duration:
        ds.mask_periodic_signal(args.p_mask_center, args.p_mask_period,
                                args.p_mask_duration)

    ## Initial outlier and period detection
    ## ------------------------------------
    ## We carry out an initial outlier and period detection using
    ## a default GP hyperparameter vector based on campaign 4 fits
    ## done using (almost) noninformative priors.

    for iset in range(ds.nsets):
        flux = ds.fluxes[iset]
        inputs = np.transpose([ds.time, ds.x, ds.y])
        detrender = Detrender(flux,
                              inputs,
                              mask=isfinite(flux),
                              splits=args.splits,
                              kernel=BasicKernelEP(),
                              tr_nrandom=args.tr_nrandom,
                              tr_nblocks=args.tr_nblocks,
                              tr_bspan=args.tr_bspan)

        ttrend, ptrend = detrender.predict(detrender.kernel.pv0 + 1e-5,
                                           components=True)

        cflux = flux - ptrend + median(ptrend) - ttrend + median(ttrend)
        cflux /= nanmedian(cflux)

        ## Iterative sigma-clipping
        ## ------------------------
        info('Starting initial outlier detection')
        fmask = isfinite(cflux)
        omask = fmask.copy()
        i, nm = 0, None
        while nm != omask.sum() and i < 10:
            nm = omask.sum()
            _, sigma = medsig(cflux[omask])
            omask[fmask] &= (cflux[fmask] < 1 + 5 * sigma) & (cflux[fmask] >
                                                              1 - 5 * sigma)
            i += 1
        masks.append(fmask)
        ofrac = (~omask).sum() / omask.size
        if ofrac < 0.25:
            masks[-1] &= omask
            info('  Flagged %i (%4.1f%%) outliers.', (~omask).sum(), ofrac)
        else:
            info('  Found %i (%4.1f%%) outliers. Not flagging..',
                 (~omask).sum(), ofrac)

        ## Lomb-Scargle period search
        ## --------------------------
        info('Starting Lomb-Scargle period search')
        mask = masks[-1]
        nflux = flux - ptrend + nanmedian(ptrend)
        ntime = ds.time - ds.time.mean()
        pflux = np.poly1d(np.polyfit(ntime[mask], nflux[mask], 9))(ntime)
        period, fap = psearch(ds.time[mask], (nflux - pflux)[mask],
                              args.ls_min_period, args.ls_max_period)

        if fap < 1e-50:
            ds.is_periodic = True
            ds.ls_fap = fap
            ds.ls_period = period

    ## Kernel selection
    ## ----------------
    args.kernel = 'basic'
    if args.kernel:
        info(
            'Overriding automatic kernel selection, using %s kernel as given in the command line',
            args.kernel)
        if 'periodic' in args.kernel and not args.kernel_period:
            logger.critical(
                'Need to give period (--kernel-period) if overriding automatic kernel detection with a periodic kernel. Quitting.'
            )
            exit(1)
        kernel = kernels[args.kernel](period=args.kernel_period)
    else:
        info('  Using %s position kernel', args.default_position_kernel)
        if ds.is_periodic:
            info(
                '  Found periodicity p = {:7.2f} (fap {:7.4e} < 1e-50), will use a quasiperiodic kernel'
                .format(ds.ls_period, ds.ls_fap))
        else:
            info('  No strong periodicity found, using a basic kernel')

        if args.default_position_kernel.lower() == 'sqrexp':
            kernel = QuasiPeriodicKernel(
                period=ds.ls_period) if ds.is_periodic else BasicKernel()
        else:
            kernel = QuasiPeriodicKernelEP(
                period=ds.ls_period) if ds.is_periodic else BasicKernelEP()

    ## Detrending
    ## ----------
    for iset in range(ds.nsets):
        if ds.nsets > 1:
            logger.name = 'Worker {:d} <{:d}-{:d}>'.format(
                mpi_rank, dataset.epic, iset + 1)
        np.random.seed(args.seed)
        tstart = time()
        inputs = np.transpose([ds.time, ds.x, ds.y])
        detrender = Detrender(ds.fluxes[iset],
                              inputs,
                              mask=masks[iset],
                              splits=args.splits,
                              kernel=kernel,
                              tr_nrandom=args.tr_nrandom,
                              tr_nblocks=args.tr_nblocks,
                              tr_bspan=args.tr_bspan)

        de = DiffEvol(detrender.neglnposterior, kernel.bounds, args.de_npop)

        ## Period population generation
        ## ----------------------------
        if isinstance(kernel, QuasiPeriodicKernel):
            de._population[:, 2] = np.clip(
                normal(kernel.period, 0.1 * kernel.period, size=de.n_pop),
                args.ls_min_period, args.ls_max_period)

        ## Global hyperparameter optimisation
        ## ----------------------------------
        info('Starting global hyperparameter optimisation using DE')
        tstart_de = time()
        for i, r in enumerate(de(args.de_niter)):
            info('  DE iteration %3i -ln(L) %4.1f', i, de.minimum_value)
            tcur_de = time()
            if ((de._fitness.ptp() < 3) or
                (tcur_de - tstart_de > args.de_max_time)) and (i > 2):
                break
        info('  DE finished in %i seconds', tcur_de - tstart_de)
        info(
            '  DE minimum found at: %s',
            np.array_str(de.minimum_location, precision=3, max_line_width=250))
        info('  DE -ln(L) %4.1f', de.minimum_value)

        ## Local hyperparameter optimisation
        ## ---------------------------------
        info('Starting local hyperparameter optimisation')
        try:
            with warnings.catch_warnings():
                warnings.filterwarnings('ignore',
                                        category=RuntimeWarning,
                                        append=True)
                pv, warn = detrender.train(de.minimum_location)
        except ValueError as e:
            logger.error('Local optimiser failed, %s', e)
            logger.error('Skipping the file')
            return
        info('  Local minimum found at: %s', np.array_str(pv, precision=3))

        ## Trend computation
        ## -----------------
        (mt, tt), (mp, tp) = map(lambda a: (nanmedian(a), a - nanmedian(a)),
                                 detrender.predict(pv, components=True))

        ## Iterative sigma-clipping
        ## ------------------------
        info('Starting final outlier detection')
        flux = detrender.data.unmasked_flux
        cflux = flux - tp - tt
        cflux /= nanmedian(cflux)

        fmask = isfinite(cflux)
        mhigh = zeros_like(fmask)
        mlow = zeros_like(fmask)
        mask = fmask.copy()
        i, nm = 0, None
        while nm != mask.sum() and i < 10:
            nm = mask.sum()
            _, sigma = medsig(cflux[mask])
            mhigh[fmask] = cflux[fmask] > 1 + 5 * sigma
            mlow[fmask] = cflux[fmask] < 1 - 5 * sigma
            mask &= fmask & (~mlow) & (~mhigh)
            i += 1
        ds.mflags[iset][~fmask] |= M_NOTFINITE
        ds.mflags[iset][mhigh] |= M_OUTLIER_U
        ds.mflags[iset][mlow] |= M_OUTLIER_D

        info('  %5i too high', mhigh.sum())
        info('  %5i too low', mlow.sum())
        info('  %5i not finite', (~fmask).sum())

        ## Detrending and CDPP computation
        ## -------------------------------
        info('Computing time and position trends')
        dd = detrender.data
        cdpp_r = cdpp(dd.masked_time, dd.masked_flux)
        cdpp_t = cdpp(dd.unmasked_time,
                      dd.unmasked_flux - tp,
                      exclude=~dd.mask)
        cdpp_c = cdpp(dd.unmasked_time,
                      dd.unmasked_flux - tp - tt,
                      exclude=~dd.mask)
        results.append(
            Result(detrender, pv, tt + mt, tp + mp, cdpp_r, cdpp_t, cdpp_c,
                   warn))
        info('  CDPP - raw - %6.3f', cdpp_r)
        info('  CDPP - position component removed - %6.3f', cdpp_t)
        info('  CDPP - full reduction - %6.3f', cdpp_c)
        info('Detrending time %6.3f', time() - tstart)

    info('Finished')
    return dataset, results
コード例 #8
0
def rebin_err(t,
              f,
              ferr=None,
              dt=0.02,
              phasefolded=False,
              ferr_type='medsig',
              ferr_style='std',
              sigmaclip=False):
    """
    @written by Ed Gillen, extended by Maximilian N. Guenther
    The standard rebin function but also dealing with errors
    on the individual data points being binned.
    ferr_type:
        'medsig'
        'meanstd'
    ferr_style:
        'std'
        'sem' = std / sqrt(N)
    """
    #::: sigma clip
    if sigmaclip is True:
        try:
            f = sigma_clip(f, sigma=5, iters=3)
        except:
            pass

    #::: make masked values to NaNs if applicable
    try:
        f[f.mask] = np.nan
    except:
        pass

    #::: bin
    #::: detect if it's phase-folded data or not
    if phasefolded is False:
        treg = np.r_[t.min():t.max():dt]
    else:
        treg = np.r_[-0.25:0.75:dt]
    nreg = len(treg)
    freg = np.zeros(nreg) + np.nan
    freg_err = np.zeros(nreg) + np.nan
    N = np.zeros(nreg)
    for i in np.arange(nreg):
        l = (t >= treg[i]) * (t < treg[i] + dt)
        if l.any():
            treg[i] = np.nanmean(t[l])
            N[i] = len(t[l])
            if ferr == None:
                if ferr_type == 'medsig':
                    freg[i], freg_err[i] = medsig(f[l])
                else:
                    try:
                        freg[i] = np.nanmean(f[l])
                        freg_err[i] = np.nanstd(f[l])
                    except:  #e.g. in case of an empty or completely masked array
                        freg[i] = np.nan
                        freg_err[i] = np.nan

                if ferr_style == 'sem':
                    freg_err[i] /= np.sqrt(len(f[l]))
            else:
                freg[i], freg_err[i] = weighted_avg_and_std(
                    f[l], np.ma.array([1 / float(x) for x in ferr[l]]))

    if phasefolded is False:
        k = np.isfinite(freg)  #only return finite bins
    else:
        k = slice(None)  #return the entire phase, filled with NaN replacements

    return treg[k], freg[k], freg_err[k], N[k]