def mask_outliers(self, max_sigma=5, pv=None): pv = self.kernel.pv0 if pv is None else pv qm = self.data.quality_mask predicted_flux = self.predict(pv) residuals = self.data.unmasked_normalised_flux - predicted_flux flux_median, flux_std = medsig(residuals[qm]) outlier_mask = ones_like(self.data.mask) outlier_mask[qm] = abs(residuals[qm] - flux_median) < (max_sigma * flux_std) self.data.set_mask(outlier_mask) return outlier_mask.sum()
def __init__(self, flux, inputs, mask=None): self.flux = array(flux) self.inputs = array(inputs) self.quality_mask = array(mask) if mask is not None else ones( self.flux.size, np.bool) self.outlier_mask = ones(self.flux.size, np.bool) self.mask = self.quality_mask & self.outlier_mask self.npt = self.flux.size self._fm, self._fs = medsig(self.flux[self.mask]) assert self.flux.ndim == 1, 'The flux array for DtData should be 1D [npt]' assert self.inputs.ndim == 2, 'The input array for DtData should be 2D [npt,3]' assert self.inputs.shape[ 1] == 3, 'The input array for DtData should be 2D with the shape [npt,3]' assert self.quality_mask.ndim == 1, 'The mask array for DtData should be 1D [npt]'
def __init__(self, flux, inputs, mask=None): self._flux = array(flux) self._inputs = array(inputs) self._mask = array(mask) if mask is not None else ones(self._flux.size, np.bool) self._fm, self._fs = medsig(self.masked_flux) self.nptm = self.masked_flux.size self.nptu = self.unmasked_flux.size assert self._flux.ndim == 1, "The flux array for DtData should be 1D [npt]" assert self._inputs.ndim == 2, "The input array for DtData should be 2D [npt,3]" assert self._inputs.shape[1] == 3, "The input array for DtData should be 2D with the shape [npt,3]" self.mf, self.uf = self.masked_flux, self.unmasked_flux self.mi, self.ui = self.masked_inputs, self.unmasked_inputs self.mt, self.ut = self.masked_time, self.unmasked_time self.mx, self.ux = self.masked_x, self.unmasked_x self.my, self.uy = self.masked_y, self.unmasked_y
def __init__(self, flux, inputs, mask=None): self._flux = array(flux) self._inputs = array(inputs) self._mask = array(mask) if mask is not None else ones( self._flux.size, np.bool) self._fm, self._fs = medsig(self.masked_flux) self.nptm = self.masked_flux.size self.nptu = self.unmasked_flux.size assert self._flux.ndim == 1, 'The flux array for DtData should be 1D [npt]' assert self._inputs.ndim == 2, 'The input array for DtData should be 2D [npt,3]' assert self._inputs.shape[ 1] == 3, 'The input array for DtData should be 2D with the shape [npt,3]' self.mf, self.uf = self.masked_flux, self.unmasked_flux self.mi, self.ui = self.masked_inputs, self.unmasked_inputs self.mt, self.ut = self.masked_time, self.unmasked_time self.mx, self.ux = self.masked_x, self.unmasked_x self.my, self.uy = self.masked_y, self.unmasked_y
def rebin_err(t, f, ferr=None, dt=0.02, ferr_type='medsig', ferr_style='std'): """ @written by Ed Gillen The standard rebin function but also dealing with errors on the individual data points being binned. ferr_type: 'medsig' 'meanstd' ferr_style: 'std' 'sem' = std / sqrt(N) """ treg = np.r_[t.min():t.max():dt] nreg = len(treg) freg = np.zeros(nreg) + np.nan # if ferr!=None: freg_err = np.ma.zeros(nreg) + np.nan for i in np.arange(nreg): l = (t >= treg[i]) * (t < treg[i] + dt) if l.any(): treg[i] = np.ma.mean(t[l]) if ferr == None: if ferr_type == 'medsig': freg[i], freg_err[i] = medsig(f[l]) else: freg[i] = np.nanmean(f[l]) freg_err[i] = np.nanstd(f[l]) if ferr_style == 'sem': freg_err[i] /= np.sqrt(len(f[l])) else: freg[i], freg_err[i] = weighted_avg_and_std( f[l], np.ma.array([1 / float(x) for x in ferr[l]])) l = np.isfinite(freg) # if ferr==None: # return treg[l], freg[l] return treg[l], freg[l], freg_err[l]
def detrend(dataset, args): """ Needs to have args defined """ ## Setup the logger ## ---------------- logger = logging.getLogger('Worker %i' % mpi_rank) logger.name = '<{:d}>'.format(dataset.epic) np.seterrcall(lambda e,f: logger.info(e)) np.seterr(invalid='ignore') ## Main variables ## -------------- Result = namedtuple('SCResult', 'detrender pv tr_time tr_position cdpp_r cdpp_t cdpp_c warn') results = [] # a list of Result tuples, one per aperture masks = [] # a list of light curve masks, one per aperture ## Initialise utility variables ## ---------------------------- ds = dataset info = logger.info ## Periodic signal masking ## ----------------------- if args.p_mask_center and args.p_mask_period and args.p_mask_duration: ds.mask_periodic_signal( args.p_mask_center, args.p_mask_period, args.p_mask_duration ) ## Initial outlier and period detection ## ------------------------------------ ## We carry out an initial outlier and period detection using ## a default GP hyperparameter vector based on campaign 4 fits ## done using (almost) noninformative priors. for iset in range(ds.nsets): flux = ds.fluxes[iset] inputs = np.transpose([ds.time,ds.x,ds.y]) detrender = Detrender( flux, inputs, mask=isfinite(flux), splits=args.splits, kernel=BasicKernelEP(), tr_nrandom=args.tr_nrandom, tr_nblocks=args.tr_nblocks, tr_bspan=args.tr_bspan ) ttrend,ptrend = detrender.predict( detrender.kernel.pv0+1e-5, components=True ) cflux = flux - ptrend + median(ptrend) - ttrend + median(ttrend) cflux /= nanmedian(cflux) ## Iterative sigma-clipping ## ------------------------ info('Starting initial outlier detection') fmask = isfinite(cflux) omask = fmask.copy() i, nm = 0, None while nm != omask.sum() and i<10: nm = omask.sum() _, sigma = medsig(cflux[omask]) omask[fmask] &= (cflux[fmask] < 1+5*sigma) & (cflux[fmask] > 1-5*sigma) i += 1 masks.append(fmask) ofrac = (~omask).sum() / omask.size if ofrac < 0.25: masks[-1] &= omask info(' Flagged %i (%4.1f%%) outliers.', (~omask).sum(), ofrac) else: info(' Found %i (%4.1f%%) outliers. Not flagging..', (~omask).sum(), ofrac) ## Lomb-Scargle period search ## -------------------------- info('Starting Lomb-Scargle period search') mask = masks[-1] nflux = flux - ptrend + nanmedian(ptrend) ntime = ds.time - ds.time.mean() pflux = np.poly1d(np.polyfit(ntime[mask], nflux[mask], 9))(ntime) period, fap = psearch(ds.time[mask], (nflux-pflux)[mask], args.ls_min_period, args.ls_max_period) if fap < 1e-50: ds.is_periodic = True ds.ls_fap = fap ds.ls_period = period ## Kernel selection ## ---------------- args.kernel='basic' if args.kernel: info('Overriding automatic kernel selection, using %s kernel as given in the command line', args.kernel) if 'periodic' in args.kernel and not args.kernel_period: logger.critical('Need to give period (--kernel-period) if overriding automatic kernel detection with a periodic kernel. Quitting.') exit(1) kernel = kernels[args.kernel](period=args.kernel_period) else: info(' Using %s position kernel', args.default_position_kernel) if ds.is_periodic: info(' Found periodicity p = {:7.2f} (fap {:7.4e} < 1e-50), will use a quasiperiodic kernel'.format(ds.ls_period, ds.ls_fap)) else: info(' No strong periodicity found, using a basic kernel') if args.default_position_kernel.lower() == 'sqrexp': kernel = QuasiPeriodicKernel(period=ds.ls_period) if ds.is_periodic else BasicKernel() else: kernel = QuasiPeriodicKernelEP(period=ds.ls_period) if ds.is_periodic else BasicKernelEP() ## Detrending ## ---------- for iset in range(ds.nsets): if ds.nsets > 1: logger.name = 'Worker {:d} <{:d}-{:d}>'.format(mpi_rank, dataset.epic, iset+1) np.random.seed(args.seed) tstart = time() inputs = np.transpose([ds.time,ds.x,ds.y]) detrender = Detrender(ds.fluxes[iset], inputs, mask=masks[iset], splits=args.splits, kernel=kernel, tr_nrandom=args.tr_nrandom, tr_nblocks=args.tr_nblocks, tr_bspan=args.tr_bspan) de = DiffEvol(detrender.neglnposterior, kernel.bounds, args.de_npop) ## Period population generation ## ---------------------------- if isinstance(kernel, QuasiPeriodicKernel): de._population[:,2] = np.clip(normal(kernel.period, 0.1*kernel.period, size=de.n_pop), args.ls_min_period, args.ls_max_period) ## Global hyperparameter optimisation ## ---------------------------------- info('Starting global hyperparameter optimisation using DE') tstart_de = time() for i,r in enumerate(de(args.de_niter)): info(' DE iteration %3i -ln(L) %4.1f', i, de.minimum_value) tcur_de = time() if ((de._fitness.ptp() < 3) or (tcur_de - tstart_de > args.de_max_time)) and (i>2): break info(' DE finished in %i seconds', tcur_de-tstart_de) info(' DE minimum found at: %s', np.array_str(de.minimum_location, precision=3, max_line_width=250)) info(' DE -ln(L) %4.1f', de.minimum_value) ## Local hyperparameter optimisation ## --------------------------------- info('Starting local hyperparameter optimisation') try: with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning, append=True) pv, warn = detrender.train(de.minimum_location) except ValueError as e: logger.error('Local optimiser failed, %s', e) logger.error('Skipping the file') return info(' Local minimum found at: %s', np.array_str(pv, precision=3)) ## Trend computation ## ----------------- (mt,tt),(mp,tp) = map(lambda a: (nanmedian(a), a-nanmedian(a)), detrender.predict(pv, components=True)) ## Iterative sigma-clipping ## ------------------------ info('Starting final outlier detection') flux = detrender.data.unmasked_flux cflux = flux-tp-tt cflux /= nanmedian(cflux) fmask = isfinite(cflux) mhigh = zeros_like(fmask) mlow = zeros_like(fmask) mask = fmask.copy() i, nm = 0, None while nm != mask.sum() and i<10: nm = mask.sum() _, sigma = medsig(cflux[mask]) mhigh[fmask] = cflux[fmask] > 1+5*sigma mlow[fmask] = cflux[fmask] < 1-5*sigma mask &= fmask & (~mlow) & (~mhigh) i += 1 ds.mflags[iset][~fmask] |= M_NOTFINITE ds.mflags[iset][mhigh] |= M_OUTLIER_U ds.mflags[iset][mlow] |= M_OUTLIER_D info(' %5i too high', mhigh.sum()) info(' %5i too low', mlow.sum()) info(' %5i not finite', (~fmask).sum()) ## Detrending and CDPP computation ## ------------------------------- info('Computing time and position trends') dd = detrender.data cdpp_r = cdpp(dd.masked_time, dd.masked_flux) cdpp_t = cdpp(dd.unmasked_time, dd.unmasked_flux-tp, exclude=~dd.mask) cdpp_c = cdpp(dd.unmasked_time, dd.unmasked_flux-tp-tt, exclude=~dd.mask) results.append(Result(detrender, pv, tt+mt, tp+mp, cdpp_r, cdpp_t, cdpp_c, warn)) info(' CDPP - raw - %6.3f', cdpp_r) info(' CDPP - position component removed - %6.3f', cdpp_t) info(' CDPP - full reduction - %6.3f', cdpp_c) info('Detrending time %6.3f', time()-tstart) info('Finished') return dataset, results
def detrend(dataset, args): """ Needs to have args defined """ ## Setup the logger ## ---------------- logger = logging.getLogger('Worker %i' % mpi_rank) logger.name = '<{:d}>'.format(dataset.epic) np.seterrcall(lambda e, f: logger.info(e)) np.seterr(invalid='ignore') ## Main variables ## -------------- Result = namedtuple( 'SCResult', 'detrender pv tr_time tr_position cdpp_r cdpp_t cdpp_c warn') results = [] # a list of Result tuples, one per aperture masks = [] # a list of light curve masks, one per aperture ## Initialise utility variables ## ---------------------------- ds = dataset info = logger.info ## Periodic signal masking ## ----------------------- if args.p_mask_center and args.p_mask_period and args.p_mask_duration: ds.mask_periodic_signal(args.p_mask_center, args.p_mask_period, args.p_mask_duration) ## Initial outlier and period detection ## ------------------------------------ ## We carry out an initial outlier and period detection using ## a default GP hyperparameter vector based on campaign 4 fits ## done using (almost) noninformative priors. for iset in range(ds.nsets): flux = ds.fluxes[iset] inputs = np.transpose([ds.time, ds.x, ds.y]) detrender = Detrender(flux, inputs, mask=isfinite(flux), splits=args.splits, kernel=BasicKernelEP(), tr_nrandom=args.tr_nrandom, tr_nblocks=args.tr_nblocks, tr_bspan=args.tr_bspan) ttrend, ptrend = detrender.predict(detrender.kernel.pv0 + 1e-5, components=True) cflux = flux - ptrend + median(ptrend) - ttrend + median(ttrend) cflux /= nanmedian(cflux) ## Iterative sigma-clipping ## ------------------------ info('Starting initial outlier detection') fmask = isfinite(cflux) omask = fmask.copy() i, nm = 0, None while nm != omask.sum() and i < 10: nm = omask.sum() _, sigma = medsig(cflux[omask]) omask[fmask] &= (cflux[fmask] < 1 + 5 * sigma) & (cflux[fmask] > 1 - 5 * sigma) i += 1 masks.append(fmask) ofrac = (~omask).sum() / omask.size if ofrac < 0.25: masks[-1] &= omask info(' Flagged %i (%4.1f%%) outliers.', (~omask).sum(), ofrac) else: info(' Found %i (%4.1f%%) outliers. Not flagging..', (~omask).sum(), ofrac) ## Lomb-Scargle period search ## -------------------------- info('Starting Lomb-Scargle period search') mask = masks[-1] nflux = flux - ptrend + nanmedian(ptrend) ntime = ds.time - ds.time.mean() pflux = np.poly1d(np.polyfit(ntime[mask], nflux[mask], 9))(ntime) period, fap = psearch(ds.time[mask], (nflux - pflux)[mask], args.ls_min_period, args.ls_max_period) if fap < 1e-50: ds.is_periodic = True ds.ls_fap = fap ds.ls_period = period ## Kernel selection ## ---------------- args.kernel = 'basic' if args.kernel: info( 'Overriding automatic kernel selection, using %s kernel as given in the command line', args.kernel) if 'periodic' in args.kernel and not args.kernel_period: logger.critical( 'Need to give period (--kernel-period) if overriding automatic kernel detection with a periodic kernel. Quitting.' ) exit(1) kernel = kernels[args.kernel](period=args.kernel_period) else: info(' Using %s position kernel', args.default_position_kernel) if ds.is_periodic: info( ' Found periodicity p = {:7.2f} (fap {:7.4e} < 1e-50), will use a quasiperiodic kernel' .format(ds.ls_period, ds.ls_fap)) else: info(' No strong periodicity found, using a basic kernel') if args.default_position_kernel.lower() == 'sqrexp': kernel = QuasiPeriodicKernel( period=ds.ls_period) if ds.is_periodic else BasicKernel() else: kernel = QuasiPeriodicKernelEP( period=ds.ls_period) if ds.is_periodic else BasicKernelEP() ## Detrending ## ---------- for iset in range(ds.nsets): if ds.nsets > 1: logger.name = 'Worker {:d} <{:d}-{:d}>'.format( mpi_rank, dataset.epic, iset + 1) np.random.seed(args.seed) tstart = time() inputs = np.transpose([ds.time, ds.x, ds.y]) detrender = Detrender(ds.fluxes[iset], inputs, mask=masks[iset], splits=args.splits, kernel=kernel, tr_nrandom=args.tr_nrandom, tr_nblocks=args.tr_nblocks, tr_bspan=args.tr_bspan) de = DiffEvol(detrender.neglnposterior, kernel.bounds, args.de_npop) ## Period population generation ## ---------------------------- if isinstance(kernel, QuasiPeriodicKernel): de._population[:, 2] = np.clip( normal(kernel.period, 0.1 * kernel.period, size=de.n_pop), args.ls_min_period, args.ls_max_period) ## Global hyperparameter optimisation ## ---------------------------------- info('Starting global hyperparameter optimisation using DE') tstart_de = time() for i, r in enumerate(de(args.de_niter)): info(' DE iteration %3i -ln(L) %4.1f', i, de.minimum_value) tcur_de = time() if ((de._fitness.ptp() < 3) or (tcur_de - tstart_de > args.de_max_time)) and (i > 2): break info(' DE finished in %i seconds', tcur_de - tstart_de) info( ' DE minimum found at: %s', np.array_str(de.minimum_location, precision=3, max_line_width=250)) info(' DE -ln(L) %4.1f', de.minimum_value) ## Local hyperparameter optimisation ## --------------------------------- info('Starting local hyperparameter optimisation') try: with warnings.catch_warnings(): warnings.filterwarnings('ignore', category=RuntimeWarning, append=True) pv, warn = detrender.train(de.minimum_location) except ValueError as e: logger.error('Local optimiser failed, %s', e) logger.error('Skipping the file') return info(' Local minimum found at: %s', np.array_str(pv, precision=3)) ## Trend computation ## ----------------- (mt, tt), (mp, tp) = map(lambda a: (nanmedian(a), a - nanmedian(a)), detrender.predict(pv, components=True)) ## Iterative sigma-clipping ## ------------------------ info('Starting final outlier detection') flux = detrender.data.unmasked_flux cflux = flux - tp - tt cflux /= nanmedian(cflux) fmask = isfinite(cflux) mhigh = zeros_like(fmask) mlow = zeros_like(fmask) mask = fmask.copy() i, nm = 0, None while nm != mask.sum() and i < 10: nm = mask.sum() _, sigma = medsig(cflux[mask]) mhigh[fmask] = cflux[fmask] > 1 + 5 * sigma mlow[fmask] = cflux[fmask] < 1 - 5 * sigma mask &= fmask & (~mlow) & (~mhigh) i += 1 ds.mflags[iset][~fmask] |= M_NOTFINITE ds.mflags[iset][mhigh] |= M_OUTLIER_U ds.mflags[iset][mlow] |= M_OUTLIER_D info(' %5i too high', mhigh.sum()) info(' %5i too low', mlow.sum()) info(' %5i not finite', (~fmask).sum()) ## Detrending and CDPP computation ## ------------------------------- info('Computing time and position trends') dd = detrender.data cdpp_r = cdpp(dd.masked_time, dd.masked_flux) cdpp_t = cdpp(dd.unmasked_time, dd.unmasked_flux - tp, exclude=~dd.mask) cdpp_c = cdpp(dd.unmasked_time, dd.unmasked_flux - tp - tt, exclude=~dd.mask) results.append( Result(detrender, pv, tt + mt, tp + mp, cdpp_r, cdpp_t, cdpp_c, warn)) info(' CDPP - raw - %6.3f', cdpp_r) info(' CDPP - position component removed - %6.3f', cdpp_t) info(' CDPP - full reduction - %6.3f', cdpp_c) info('Detrending time %6.3f', time() - tstart) info('Finished') return dataset, results
def rebin_err(t, f, ferr=None, dt=0.02, phasefolded=False, ferr_type='medsig', ferr_style='std', sigmaclip=False): """ @written by Ed Gillen, extended by Maximilian N. Guenther The standard rebin function but also dealing with errors on the individual data points being binned. ferr_type: 'medsig' 'meanstd' ferr_style: 'std' 'sem' = std / sqrt(N) """ #::: sigma clip if sigmaclip is True: try: f = sigma_clip(f, sigma=5, iters=3) except: pass #::: make masked values to NaNs if applicable try: f[f.mask] = np.nan except: pass #::: bin #::: detect if it's phase-folded data or not if phasefolded is False: treg = np.r_[t.min():t.max():dt] else: treg = np.r_[-0.25:0.75:dt] nreg = len(treg) freg = np.zeros(nreg) + np.nan freg_err = np.zeros(nreg) + np.nan N = np.zeros(nreg) for i in np.arange(nreg): l = (t >= treg[i]) * (t < treg[i] + dt) if l.any(): treg[i] = np.nanmean(t[l]) N[i] = len(t[l]) if ferr == None: if ferr_type == 'medsig': freg[i], freg_err[i] = medsig(f[l]) else: try: freg[i] = np.nanmean(f[l]) freg_err[i] = np.nanstd(f[l]) except: #e.g. in case of an empty or completely masked array freg[i] = np.nan freg_err[i] = np.nan if ferr_style == 'sem': freg_err[i] /= np.sqrt(len(f[l])) else: freg[i], freg_err[i] = weighted_avg_and_std( f[l], np.ma.array([1 / float(x) for x in ferr[l]])) if phasefolded is False: k = np.isfinite(freg) #only return finite bins else: k = slice(None) #return the entire phase, filled with NaN replacements return treg[k], freg[k], freg_err[k], N[k]