def test_remove_sines_iteratively(a, b, c, d): # define light curve with two sinusoidal modulation x = np.linspace(10, 40, 1200) y1 = 20. + np.random.normal(0, .01, 1200) + a * np.sin(b * x) + c * np.sin(d * x) flc = FlareLightCurve( time=x, flux=y1, flux_err=np.full_like(y1, .01), ) flc.detrended_flux = y1 flc.detrended_flux_err = np.full_like(y1, .01) # find median flc = find_iterative_median(flc) # flc.plot() # apply function flcd = remove_sines_iteratively(flc) # plt.plot(flcd.time, flcd.flux) # plt.plot(flcd.time, flcd.detrended_flux) # do some checks assert flcd.detrended_flux.std() == pytest.approx(0.01, rel=1e-1) assert flcd.detrended_flux.max() < 20.2 assert flcd.detrended_flux.min() > 19.8
def test_remove_exponential_fringes(a, b, median, c, d): # seed numpy random to exclude outliers np.random.seed(42) # define light curve with two positive fringes x = np.linspace(10, 40, 1200) y1 = (a * np.exp(-1 * (b - x) * 2) + median + c * np.exp( (d - x) * 2) + np.random.normal(0, .0005 * median, 1200)) y1[800:810] = median + median * .05 * np.linspace(1, 0, 10) # define lightcurve flc = FlareLightCurve(time=x, flux=y1, flux_err=np.full_like(y1, .0005 * median)) flc.detrended_flux = y1 flc.detrended_flux_err = np.full_like(y1, .0005 * median) # get iterative median flc = find_iterative_median(flc) # run the function flcd = remove_exponential_fringes(flc) # plt.plot(flcd.time, flcd.flux) # plt.plot(flcd.time, flcd.detrended_flux) # do some checks # print(flcd.detrended_flux.std(), flcd.detrended_flux.min(), flcd.detrended_flux.max()) assert flcd.detrended_flux[:799].std() == pytest.approx(.0005 * median, rel=1e-1) assert flcd.detrended_flux.max() == pytest.approx(median * 1.05) assert flcd.detrended_flux.min() > median * 0.995
def refine_detrended_flux_err(flcd, mask_pos_outliers_sigma=2.5, std_rolling_window_length=15, pad=25): """Attempt to recover a good estimate of the ligh curve noise. Start out from a simple standard deviation of the flux. Then filter out outliers above `mask_pos_outliers_sigma`. Apply rolling window standard deviation on the filtered array. Calculate a mean standard deviation from the result. Fill in this mean into the masked values. Parameters: ----------- flcd : de-trended FlareLightCurve mask_pos_outliers_sigma : float sigma value above which to mask positive outliers std_rolling_window_length : int rolling window length for standard deviation calculation pad : int How many values to pad-mask around positive outliers. Return: -------- FlareLightCurve with refined `detrended_flux_err` attribute. """ # start with a first approximation to std flcd.detrended_flux_err[:] = np.nanstd(flcd.detrended_flux) # and refine it: flcd = find_iterative_median(flcd) filtered = copy.deepcopy(flcd.detrended_flux) # mask strong positive outliers so that they don't add to std filtered[flcd.detrended_flux - flcd.it_med > mask_pos_outliers_sigma * flcd.detrended_flux_err] = np.nan # apply rolling window std flcd.detrended_flux_err[:] = pd.Series(filtered).rolling( std_rolling_window_length, min_periods=1).std() # set std to mean value if calculation fails to inf meanstd = np.nanmean(flcd.detrended_flux_err) # pad the excluded values not to create spikes of high error around flares isin = np.invert(np.isfinite(flcd.detrended_flux_err)) x = np.where(isin)[0] for i in range(-pad, pad + 1): y = x + i y[np.where(y > len(isin) - 1)] = len(isin) - 1 isin[y] = True x = np.where(isin)[0] flcd.detrended_flux_err[x] = meanstd return flcd
def test_custom_detrending( a1, a2, period1, period2, quad, cube, ): # fix uncertainty errorval = 15. np.random.seed(40) lc = generate_lightcurve(errorval, a1, a2, period1, period2, quad, cube) # lc.plot() flcc = custom_detrending(lc) flccc = estimate_detrended_noise(flcc, mask_pos_outliers_sigma=2.5, std_window=100) flccc = find_iterative_median(flccc) flares = flccc.find_flares(addtail=True).flares print(flares) # check that uncertainty is assert np.nanmedian(flccc.detrended_flux_err) == pytest.approx(errorval, abs=2) compare = pd.DataFrame({ 'istart': { 0: 5280, 1: 13160, 2: 23160 }, 'istop': { 0: 5346, 1: 13163, 2: 23175 } }) assert (flares[["istart", "istop"]] == compare[["istart", "istop"]]).all().all() assert (flares.ed_rec.values == pytest.approx(np.array( [802.25, 4.7907, 40.325]), rel=0.2)) assert (flares.ampl_rec.values == pytest.approx(np.array( [0.28757, 0.03004, 0.064365]), rel=0.25)) return
def iteratively_remove_sines(flcd, freq_unit=1 / u.day, maximum_frequency=10, minimum_frequency=0.05): def cosine(x, a, b, c, d): return a * np.cos(b * x + c) + d snr = 3 flct = copy.deepcopy(flcd) for le, ri in flct.find_gaps().gaps: flc = copy.deepcopy(flct[le:ri]) flc = find_iterative_median(flc) pg = flc.remove_nans().to_periodogram( freq_unit=freq_unit, maximum_frequency=maximum_frequency, minimum_frequency=minimum_frequency) snr = pg.flatten().max_power # print("Found peak in periodogram at ", pg.frequency_at_max_power) print("SNR at ", snr) j = 0 while ((snr > 1.) & (j < 10)): pg = flc.remove_nans().to_periodogram( freq_unit=freq_unit, maximum_frequency=maximum_frequency, minimum_frequency=minimum_frequency) cond = np.invert(np.isnan(flc.time)) & np.invert(np.isnan( flc.flux)) p, p_cov = optimize.curve_fit( cosine, flc.time[cond], flc.flux[cond], p0=[ np.nanstd(flc.flux), 2 * np.pi * pg.frequency_at_max_power.value, 0, np.nanmean(flc.flux) ]) flc.flux = np.nanmean(flc.flux) + flc.flux - cosine( flc.time, p[0], p[1], p[2], p[3]) print(snr) snr = pg.flatten().max_power print(snr) j += 1 flcd.detrended_flux[le:ri] = flc.flux return flcd
def estimate_detrended_noise( flc, mask_pos_outliers_sigma=2.5, std_window=100, ): flcc = copy.deepcopy(flc) flcc = flcc.find_gaps() for (le, ri) in flcc.gaps: flcd = copy.deepcopy(flcc[le:ri]) mask = sigma_clip(flcd.detrended_flux.value, max_sigma=mask_pos_outliers_sigma, longdecay=2) flcd.detrended_flux[~mask] = np.nan # apply rolling window std and interpolate the masked values flcd.detrended_flux_err[:] = pd.Series( flcd.detrended_flux.value).rolling( std_window, center=True, min_periods=1).std().interpolate() # and refine it: flcd = find_iterative_median(flcd) # make a copy first filtered = copy.deepcopy(flcd.detrended_flux.value) # get right bound of flux array tf = filtered.shape[0] # pick outliers mask = sigma_clip(filtered, max_sigma=mask_pos_outliers_sigma, longdecay=2) filtered[~mask] = np.nan # apply rolling window std and interpolate the masked values flcc.detrended_flux_err[le:ri] = pd.Series(filtered).rolling( std_window, center=True, min_periods=1).std().interpolate() return flcc
def custom_detrending(flc): """Wrapper""" f = flc.flux[np.isfinite(flc.flux)] if np.abs(f[0] - f[-1]) / np.median(f) > .2: print("Do a coarse spline interpolation to remove trends.") flc = fit_spline(flc, spline_coarseness=12) flc.flux[:] = flc.detrended_flux[:] # Iteratively remove fast sines with Periods of 0.1 to 2 day periods (the very fast rotators) flc = iteratively_remove_sines(flc) flc.flux[:] = flc.detrended_flux[:] # remove some rolling medians on a 10 hours time scale flc.flux[:] = flc.flux - pd.Series(flc.flux).rolling( 300, center=True).median() + np.nanmedian(flc.flux) #15h # Determine the window length for the SavGol filter for each continuous observation gap flc = find_iterative_median(flc) w = search_gaps_for_window_length(flc) flc = flc[np.isfinite(flc.flux)] #Use lightkurve's SavGol filter while padding outliers with 25 data points around the outliers/flare candidates # print(w) # flc = flc.detrend("savgol", window_length=w, pad=7) # flc.flux[:] = flc.detrended_flux[:] #After filtering, always use a 2.5 hour window to remove the remaining # flcd = flc.detrend("savgol", window_length=25, pad=7) flcd = flc # Determine the noise properties with a rolling std, padding masked outliers/candidates flcd = refine_detrended_flux_err(flcd, mask_pos_outliers_sigma=1.5, std_rolling_window_length=15, pad=25) return flcd
def remove_sines_iteratively(flcd, niter=5, freq_unit=1 / u.day, maximum_frequency=12., minimum_frequency=0.2, max_sigma=3.5, longdecay=2): """Iteratively remove strong sinusoidal signal from light curve. Each iteration calculates a Lomb-Scargle periodogram and LSQ-fits a cosine function using the dominant frequency as starting point. Parameters: ------------ flcd : FlareLightCurve light curve from which to remove niter : int Maximum number of iterations. freq_unit : astropy.units unit in which maximum_frequency and minimum_frequency are given maximum_frequency: float highest frequency to calculate the Lomb-Scargle periodogram minimum_frequency: float lowest frequency to calculate the Lomb-Scargle periodogram max_sigma : float Passed to altaipony.utils.sigma_clip. Above this value data points are flagged as outliers. longdecay : 2 altaipony.utils.sigma_clip expands the mask for series of outliers by sqrt(length of series). Longdecay doubles the mask expansion in the decay phase of what may be flares. Return: ------- FlareLightCurve with detrended_flux attribute """ # define cosine function def cosine(x, a, b, c, d): return a * np.cos(b * x + c) + d # make a copy of the original LC flct = copy.deepcopy(flcd) # iterate over chunks for le, ri in flct.find_gaps().gaps: # again make a copy of the chunk to manipulate safely flc = copy.deepcopy(flct[le:ri]) # find median of LC flc = find_iterative_median(flc) # mask flares mask = sigma_clip(flc.flux.value, max_sigma=3.5, longdecay=2) # how many data points comprise the fastest period at maximum_frequency? full_fastest_period = 1. / maximum_frequency / np.nanmin( np.diff(flc.remove_nans().time.value)) # only remove sines if LC chunk is larger than one full period of the fastest frequency if flc.flux.value.shape[0] > full_fastest_period: n = 0 # start counter snr = 3 # go into while loop at least once # iterate while there is signal, but not more than n times while ((snr > 1) & (n < niter)): t = time.process_time() # mask NaNs and outliers cond = np.invert(np.isnan(flc.time.value)) & np.invert( np.isnan(flc.flux.value)) & mask # calculate periodogram pg = flc[cond].to_periodogram( freq_unit=freq_unit, maximum_frequency=maximum_frequency, minimum_frequency=minimum_frequency) # fit sinusoidal p, p_cov = optimize.curve_fit( cosine, flc.time.value[cond], flc.flux.value[cond], p0=[ np.nanstd(flc.flux.value), 2 * np.pi * pg.frequency_at_max_power.value, 0, np.nanmean(flc.flux.value) ], ftol=1e-6) t1 = time.process_time() # replace with de-trended flux but without subtracting the median flc.flux = flc.flux.value - cosine(flc.time.value, p[0], p[1], p[2], 0.) # update SNR snr = pg.flatten().max_power # bump iterator n += 1 tf = time.process_time() # print(snr, n, tf-t, tf-t1, t1-t) # replace the empty array with the fitted detrended flux flcd.detrended_flux[le:ri] = flc.flux.value return flcd
def custom_detrending(lc, spline_coarseness=30, spline_order=3, savgol1=6., savgol2=3., pad=6, max_sigma=2.5, remove_exp_fringe=True): """Custom de-trending for TESS and Kepler short cadence light curves, including TESS Cycle 3 20s cadence. Parameters: ------------ lc : FlareLightCurve light curve that has at least time, flux and flux_err spline_coarseness : float time scale in hours for spline points. See fit_spline for details. spline_order: int Spline order for the coarse spline fit. Default is cubic spline. savgol1 : float Window size for first Savitzky-Golay filter application. Unit is hours, defaults to 6 hours. savgol2 : float Window size for second Savitzky-Golay filter application. Unit is hours, defaults to 3 hours. pad : int Outliers in Savitzky-Golay filter are padded with this number of data points. Defaults to 6. max_sigma : float sigma value at which to cap outliers and flare candidates. Default is 2.5. Choose 1.5 for very active stars. remove_exp_fringe : bool removes un-detrended fringes in the light curve with an exponential function. Default is True. Return: ------- FlareLightCurve with detrended_flux attribute """ # The commented lines will help with debugging, in case the tests in test_detrend.py fail. dt = np.mean(np.diff(lc.time.value)) # diag plot init fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 8)) lc = lc.normalize() offset = (np.mean(lc.flux.value) - np.min(lc.flux.value)) plt.plot(lc.time.value, lc.flux.value, c="c", label="original light curve") # start timing # t0 = time.process_time() # fit a spline to the general trends lc1, model = fit_spline(lc, spline_order=spline_order, spline_coarseness=spline_coarseness) # replace for next step lc1.flux = lc1.detrended_flux.value # t1 = time.process_time() # diag plot plt.plot(lc1.time.value, model, c="r", label="rough trends") plt.plot(lc1.time.value, lc1.detrended_flux.value + 1 * offset, c="yellow", label="rough trends removed") # removes strong and fast variability on 5 day to 4.8 hours # simple sines are probably because rotational variability is # either weak and transient or strong and persistent on the timescales lc2 = remove_sines_iteratively(lc1) # t2 = time.process_time() # diag plot plt.plot(lc2.time.value, lc2.detrended_flux.value + 2 * offset, c="grey", label="sines removed") # mask flares # mask = sigma_clip(lc2.detrended_flux.value, max_sigma=3.5, longdecay=2) # plt.scatter(lc2.time.value[~mask], lc2.detrended_flux.value[~mask] + 2 * offset, c="k", label="masked") # choose a 6 hour window w = int((np.rint(savgol1 / 24. / dt) // 2) * 2 + 1) # use Savitzy-Golay to iron out the rest # lc2.flux[mask] = np.nan lc3 = detrend_savgol(lc2, max_sigma=max_sigma, longdecay=pad, w=w) # t3 = time.process_time() # choose a three hour window w = int((np.rint(savgol2 / 24. / dt) // 2) * 2 + 1) # use Savitzy-Golay to iron out the rest lc4 = detrend_savgol(lc3, max_sigma=max_sigma, longdecay=pad, w=w) # t4 = time.process_time() # diag plot plt.plot(lc4.time.value, lc4.flux.value + 3 * offset, c="k", label="SavGol applied") # find median value lc4.detrended_flux = lc4.flux lc4.detrended_flux_err = lc4.flux_err lc4 = find_iterative_median(lc4) # t41 = time.process_time() # remove exopential fringes that neither spline, # nor sines, nor SavGol can remove. if remove_exp_fringe == True: lc5 = remove_exponential_fringes(lc4.remove_nans()) else: lc5 = lc4.remove_nans() # t5 = time.process_time() plt.plot(lc5.time.value, lc5.detrended_flux.value + 4 * offset, c="magenta", label="expfunc applied") # print(t1-t0, t2-t1, t3-t2, t4-t3, t41-t4, t5-t41, t5-t0) # plt.xlim(10,40) return lc5, ax