예제 #1
0
def test_remove_sines_iteratively(a, b, c, d):

    # define light curve with two sinusoidal modulation
    x = np.linspace(10, 40, 1200)
    y1 = 20. + np.random.normal(0, .01,
                                1200) + a * np.sin(b * x) + c * np.sin(d * x)

    flc = FlareLightCurve(
        time=x,
        flux=y1,
        flux_err=np.full_like(y1, .01),
    )
    flc.detrended_flux = y1
    flc.detrended_flux_err = np.full_like(y1, .01)

    # find median
    flc = find_iterative_median(flc)
    #     flc.plot()

    # apply function
    flcd = remove_sines_iteratively(flc)

    #     plt.plot(flcd.time, flcd.flux)
    #     plt.plot(flcd.time, flcd.detrended_flux)

    # do some checks
    assert flcd.detrended_flux.std() == pytest.approx(0.01, rel=1e-1)
    assert flcd.detrended_flux.max() < 20.2
    assert flcd.detrended_flux.min() > 19.8
예제 #2
0
def test_remove_exponential_fringes(a, b, median, c, d):

    # seed numpy random to exclude outliers
    np.random.seed(42)

    # define light curve with two positive fringes
    x = np.linspace(10, 40, 1200)
    y1 = (a * np.exp(-1 * (b - x) * 2) + median + c * np.exp(
        (d - x) * 2) + np.random.normal(0, .0005 * median, 1200))
    y1[800:810] = median + median * .05 * np.linspace(1, 0, 10)

    # define lightcurve
    flc = FlareLightCurve(time=x,
                          flux=y1,
                          flux_err=np.full_like(y1, .0005 * median))
    flc.detrended_flux = y1
    flc.detrended_flux_err = np.full_like(y1, .0005 * median)

    # get iterative median
    flc = find_iterative_median(flc)

    # run the function
    flcd = remove_exponential_fringes(flc)

    #     plt.plot(flcd.time, flcd.flux)
    #     plt.plot(flcd.time, flcd.detrended_flux)

    # do some checks

    #     print(flcd.detrended_flux.std(), flcd.detrended_flux.min(), flcd.detrended_flux.max())
    assert flcd.detrended_flux[:799].std() == pytest.approx(.0005 * median,
                                                            rel=1e-1)
    assert flcd.detrended_flux.max() == pytest.approx(median * 1.05)
    assert flcd.detrended_flux.min() > median * 0.995
def refine_detrended_flux_err(flcd,
                              mask_pos_outliers_sigma=2.5,
                              std_rolling_window_length=15,
                              pad=25):
    """Attempt to recover a good estimate of the ligh curve noise.
    Start out from a simple standard deviation of the flux.
    Then filter out outliers above `mask_pos_outliers_sigma`.
    Apply rolling window standard deviation on the filtered array.
    Calculate a mean standard deviation from the result.
    Fill in this mean into the masked values.
    
    Parameters:
    -----------
    flcd : de-trended FlareLightCurve
    
    mask_pos_outliers_sigma : float
        sigma value above which to mask positive outliers
    std_rolling_window_length : int
        rolling window length for standard deviation calculation
    pad : int
        How many values to pad-mask around positive outliers.
    
    Return:
    --------
    FlareLightCurve with refined `detrended_flux_err` attribute.
    
    """

    # start with a first approximation to std
    flcd.detrended_flux_err[:] = np.nanstd(flcd.detrended_flux)

    # and refine it:
    flcd = find_iterative_median(flcd)

    filtered = copy.deepcopy(flcd.detrended_flux)

    # mask strong positive outliers so that they don't add to std
    filtered[flcd.detrended_flux - flcd.it_med > mask_pos_outliers_sigma *
             flcd.detrended_flux_err] = np.nan

    # apply rolling window std
    flcd.detrended_flux_err[:] = pd.Series(filtered).rolling(
        std_rolling_window_length, min_periods=1).std()

    # set std to mean value if calculation fails to inf
    meanstd = np.nanmean(flcd.detrended_flux_err)

    # pad the excluded values not to create spikes of high error around flares
    isin = np.invert(np.isfinite(flcd.detrended_flux_err))
    x = np.where(isin)[0]
    for i in range(-pad, pad + 1):
        y = x + i
        y[np.where(y > len(isin) - 1)] = len(isin) - 1
        isin[y] = True

    x = np.where(isin)[0]
    flcd.detrended_flux_err[x] = meanstd

    return flcd
예제 #4
0
def test_custom_detrending(
    a1,
    a2,
    period1,
    period2,
    quad,
    cube,
):

    # fix uncertainty
    errorval = 15.
    np.random.seed(40)
    lc = generate_lightcurve(errorval, a1, a2, period1, period2, quad, cube)

    #     lc.plot()
    flcc = custom_detrending(lc)

    flccc = estimate_detrended_noise(flcc,
                                     mask_pos_outliers_sigma=2.5,
                                     std_window=100)

    flccc = find_iterative_median(flccc)

    flares = flccc.find_flares(addtail=True).flares
    print(flares)

    # check that uncertainty is
    assert np.nanmedian(flccc.detrended_flux_err) == pytest.approx(errorval,
                                                                   abs=2)

    compare = pd.DataFrame({
        'istart': {
            0: 5280,
            1: 13160,
            2: 23160
        },
        'istop': {
            0: 5346,
            1: 13163,
            2: 23175
        }
    })
    assert (flares[["istart", "istop"]] == compare[["istart",
                                                    "istop"]]).all().all()

    assert (flares.ed_rec.values == pytest.approx(np.array(
        [802.25, 4.7907, 40.325]),
                                                  rel=0.2))

    assert (flares.ampl_rec.values == pytest.approx(np.array(
        [0.28757, 0.03004, 0.064365]),
                                                    rel=0.25))

    return
def iteratively_remove_sines(flcd,
                             freq_unit=1 / u.day,
                             maximum_frequency=10,
                             minimum_frequency=0.05):
    def cosine(x, a, b, c, d):
        return a * np.cos(b * x + c) + d

    snr = 3
    flct = copy.deepcopy(flcd)
    for le, ri in flct.find_gaps().gaps:
        flc = copy.deepcopy(flct[le:ri])
        flc = find_iterative_median(flc)
        pg = flc.remove_nans().to_periodogram(
            freq_unit=freq_unit,
            maximum_frequency=maximum_frequency,
            minimum_frequency=minimum_frequency)
        snr = pg.flatten().max_power
        #    print("Found peak in periodogram at ", pg.frequency_at_max_power)
        print("SNR at ", snr)
        j = 0
        while ((snr > 1.) & (j < 10)):
            pg = flc.remove_nans().to_periodogram(
                freq_unit=freq_unit,
                maximum_frequency=maximum_frequency,
                minimum_frequency=minimum_frequency)

            cond = np.invert(np.isnan(flc.time)) & np.invert(np.isnan(
                flc.flux))
            p, p_cov = optimize.curve_fit(
                cosine,
                flc.time[cond],
                flc.flux[cond],
                p0=[
                    np.nanstd(flc.flux),
                    2 * np.pi * pg.frequency_at_max_power.value, 0,
                    np.nanmean(flc.flux)
                ])
            flc.flux = np.nanmean(flc.flux) + flc.flux - cosine(
                flc.time, p[0], p[1], p[2], p[3])
            print(snr)
            snr = pg.flatten().max_power
            print(snr)
            j += 1

        flcd.detrended_flux[le:ri] = flc.flux
    return flcd
예제 #6
0
def estimate_detrended_noise(
    flc,
    mask_pos_outliers_sigma=2.5,
    std_window=100,
):

    flcc = copy.deepcopy(flc)
    flcc = flcc.find_gaps()

    for (le, ri) in flcc.gaps:

        flcd = copy.deepcopy(flcc[le:ri])
        mask = sigma_clip(flcd.detrended_flux.value,
                          max_sigma=mask_pos_outliers_sigma,
                          longdecay=2)

        flcd.detrended_flux[~mask] = np.nan
        # apply rolling window std and interpolate the masked values
        flcd.detrended_flux_err[:] = pd.Series(
            flcd.detrended_flux.value).rolling(
                std_window, center=True, min_periods=1).std().interpolate()

        # and refine it:
        flcd = find_iterative_median(flcd)

        # make a copy first
        filtered = copy.deepcopy(flcd.detrended_flux.value)

        # get right bound of flux array
        tf = filtered.shape[0]

        # pick outliers
        mask = sigma_clip(filtered,
                          max_sigma=mask_pos_outliers_sigma,
                          longdecay=2)

        filtered[~mask] = np.nan

        # apply rolling window std and interpolate the masked values
        flcc.detrended_flux_err[le:ri] = pd.Series(filtered).rolling(
            std_window, center=True, min_periods=1).std().interpolate()
    return flcc
def custom_detrending(flc):
    """Wrapper"""
    f = flc.flux[np.isfinite(flc.flux)]

    if np.abs(f[0] - f[-1]) / np.median(f) > .2:
        print("Do a coarse spline interpolation to remove trends.")
        flc = fit_spline(flc, spline_coarseness=12)
        flc.flux[:] = flc.detrended_flux[:]

    # Iteratively remove fast sines with Periods of 0.1 to 2 day periods (the very fast rotators)
    flc = iteratively_remove_sines(flc)
    flc.flux[:] = flc.detrended_flux[:]

    # remove some rolling medians on a 10 hours time scale
    flc.flux[:] = flc.flux - pd.Series(flc.flux).rolling(
        300, center=True).median() + np.nanmedian(flc.flux)  #15h

    # Determine the window length for the SavGol filter for each continuous observation gap
    flc = find_iterative_median(flc)
    w = search_gaps_for_window_length(flc)

    flc = flc[np.isfinite(flc.flux)]

    #Use lightkurve's SavGol filter while padding outliers with 25 data points around the outliers/flare candidates
    #  print(w)
    #  flc = flc.detrend("savgol", window_length=w, pad=7)
    #  flc.flux[:] = flc.detrended_flux[:]

    #After filtering, always use a 2.5 hour window to remove the remaining
    # flcd = flc.detrend("savgol", window_length=25, pad=7)
    flcd = flc
    # Determine the noise properties with a rolling std, padding masked outliers/candidates
    flcd = refine_detrended_flux_err(flcd,
                                     mask_pos_outliers_sigma=1.5,
                                     std_rolling_window_length=15,
                                     pad=25)
    return flcd
예제 #8
0
def remove_sines_iteratively(flcd,
                             niter=5,
                             freq_unit=1 / u.day,
                             maximum_frequency=12.,
                             minimum_frequency=0.2,
                             max_sigma=3.5,
                             longdecay=2):
    """Iteratively remove strong sinusoidal signal
    from light curve. Each iteration calculates a Lomb-Scargle 
    periodogram and LSQ-fits a cosine function using the dominant
    frequency as starting point. 
    
    
    Parameters:
    ------------
    flcd : FlareLightCurve
        light curve from which to remove 
    niter : int
        Maximum number of iterations. 
    freq_unit : astropy.units
        unit in which maximum_frequency and minimum_frequency
        are given
    maximum_frequency: float
        highest frequency to calculate the Lomb-Scargle periodogram
    minimum_frequency: float
        lowest frequency to calculate the Lomb-Scargle periodogram
    max_sigma : float
        Passed to altaipony.utils.sigma_clip. 
        Above this value data points
        are flagged as outliers.
    longdecay : 2
        altaipony.utils.sigma_clip expands the mask for series
        of outliers by sqrt(length of series). Longdecay doubles
        the mask expansion in the decay phase of what may be flares.
        
    Return:
    -------
    FlareLightCurve with detrended_flux attribute
            
    """

    # define cosine function
    def cosine(x, a, b, c, d):
        return a * np.cos(b * x + c) + d

    # make a copy of the original LC
    flct = copy.deepcopy(flcd)

    # iterate over chunks
    for le, ri in flct.find_gaps().gaps:

        # again make a copy of the chunk to manipulate safely
        flc = copy.deepcopy(flct[le:ri])

        # find median of LC
        flc = find_iterative_median(flc)

        # mask flares
        mask = sigma_clip(flc.flux.value, max_sigma=3.5, longdecay=2)

        # how many data points comprise the fastest period at maximum_frequency?
        full_fastest_period = 1. / maximum_frequency / np.nanmin(
            np.diff(flc.remove_nans().time.value))

        # only remove sines if LC chunk is larger than one full period of the fastest frequency
        if flc.flux.value.shape[0] > full_fastest_period:

            n = 0  # start counter
            snr = 3  # go into while loop at least once

            # iterate while there is signal, but not more than n times
            while ((snr > 1) & (n < niter)):
                t = time.process_time()
                # mask NaNs and outliers
                cond = np.invert(np.isnan(flc.time.value)) & np.invert(
                    np.isnan(flc.flux.value)) & mask

                # calculate periodogram
                pg = flc[cond].to_periodogram(
                    freq_unit=freq_unit,
                    maximum_frequency=maximum_frequency,
                    minimum_frequency=minimum_frequency)

                # fit sinusoidal
                p, p_cov = optimize.curve_fit(
                    cosine,
                    flc.time.value[cond],
                    flc.flux.value[cond],
                    p0=[
                        np.nanstd(flc.flux.value),
                        2 * np.pi * pg.frequency_at_max_power.value, 0,
                        np.nanmean(flc.flux.value)
                    ],
                    ftol=1e-6)
                t1 = time.process_time()
                # replace with de-trended flux but without subtracting the median
                flc.flux = flc.flux.value - cosine(flc.time.value, p[0], p[1],
                                                   p[2], 0.)

                # update SNR
                snr = pg.flatten().max_power

                # bump iterator
                n += 1
                tf = time.process_time()


#                 print(snr, n, tf-t, tf-t1, t1-t)

# replace the empty array with the fitted detrended flux
            flcd.detrended_flux[le:ri] = flc.flux.value

    return flcd
예제 #9
0
def custom_detrending(lc,
                      spline_coarseness=30,
                      spline_order=3,
                      savgol1=6.,
                      savgol2=3.,
                      pad=6,
                      max_sigma=2.5,
                      remove_exp_fringe=True):
    """Custom de-trending for TESS and Kepler 
    short cadence light curves, including TESS Cycle 3 20s
    cadence.
    
    Parameters:
    ------------
    lc : FlareLightCurve
        light curve that has at least time, flux and flux_err
    spline_coarseness : float
        time scale in hours for spline points. 
        See fit_spline for details.
    spline_order: int
        Spline order for the coarse spline fit.
        Default is cubic spline.
    savgol1 : float
        Window size for first Savitzky-Golay filter application.
        Unit is hours, defaults to 6 hours.
    savgol2 : float
        Window size for second Savitzky-Golay filter application.
        Unit is hours, defaults to 3 hours.
    pad : int
        Outliers in Savitzky-Golay filter are padded with this
        number of data points. Defaults to 6.
    max_sigma : float
        sigma value at which to cap outliers and flare candidates. 
        Default is 2.5. Choose 1.5 for very active stars.
    remove_exp_fringe : bool
        removes un-detrended fringes in the light curve with an exponential 
        function. Default is True.
    
        
    Return:
    -------
    FlareLightCurve with detrended_flux attribute
    """
    # The commented lines will help with debugging, in case the tests in test_detrend.py fail.

    dt = np.mean(np.diff(lc.time.value))

    # diag plot init
    fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(16, 8))

    lc = lc.normalize()
    offset = (np.mean(lc.flux.value) - np.min(lc.flux.value))
    plt.plot(lc.time.value, lc.flux.value, c="c", label="original light curve")

    #     start timing
    #     t0 = time.process_time()

    # fit a spline to the general trends
    lc1, model = fit_spline(lc,
                            spline_order=spline_order,
                            spline_coarseness=spline_coarseness)

    # replace for next step
    lc1.flux = lc1.detrended_flux.value

    #     t1 = time.process_time()

    # diag plot
    plt.plot(lc1.time.value, model, c="r", label="rough trends")
    plt.plot(lc1.time.value,
             lc1.detrended_flux.value + 1 * offset,
             c="yellow",
             label="rough trends removed")

    # removes strong and fast variability on 5 day to 4.8 hours
    # simple sines are probably because rotational variability is
    # either weak and transient or strong and persistent on the timescales
    lc2 = remove_sines_iteratively(lc1)

    #     t2 = time.process_time()

    # diag plot
    plt.plot(lc2.time.value,
             lc2.detrended_flux.value + 2 * offset,
             c="grey",
             label="sines removed")
    # mask flares
    #     mask = sigma_clip(lc2.detrended_flux.value, max_sigma=3.5, longdecay=2)
    #     plt.scatter(lc2.time.value[~mask], lc2.detrended_flux.value[~mask] + 2 * offset, c="k", label="masked")

    # choose a 6 hour window
    w = int((np.rint(savgol1 / 24. / dt) // 2) * 2 + 1)

    # use Savitzy-Golay to iron out the rest
    #     lc2.flux[mask] = np.nan
    lc3 = detrend_savgol(lc2, max_sigma=max_sigma, longdecay=pad, w=w)
    #     t3 = time.process_time()

    # choose a three hour window
    w = int((np.rint(savgol2 / 24. / dt) // 2) * 2 + 1)

    # use Savitzy-Golay to iron out the rest
    lc4 = detrend_savgol(lc3, max_sigma=max_sigma, longdecay=pad, w=w)

    #     t4 = time.process_time()

    # diag plot
    plt.plot(lc4.time.value,
             lc4.flux.value + 3 * offset,
             c="k",
             label="SavGol applied")

    # find median value
    lc4.detrended_flux = lc4.flux
    lc4.detrended_flux_err = lc4.flux_err
    lc4 = find_iterative_median(lc4)

    #     t41 = time.process_time()

    # remove exopential fringes that neither spline,
    # nor sines, nor SavGol can remove.
    if remove_exp_fringe == True:
        lc5 = remove_exponential_fringes(lc4.remove_nans())
    else:
        lc5 = lc4.remove_nans()


#     t5 = time.process_time()

    plt.plot(lc5.time.value,
             lc5.detrended_flux.value + 4 * offset,
             c="magenta",
             label="expfunc applied")
    # print(t1-t0, t2-t1, t3-t2, t4-t3, t41-t4, t5-t41, t5-t0)
    #     plt.xlim(10,40)
    return lc5, ax