Example #1
0
def construct_KpVsys(rv, ccf, ccf_e, dp, kprange=[0, 300], dkp=1.0):
    """The name says it all. Do good tests."""
    import tayph.functions as fun
    import tayph.operations as ops
    import numpy as np
    import tayph.system_parameters as sp
    import matplotlib.pyplot as plt
    import astropy.io.fits as fits
    import tayph.util as ut
    import sys
    import pdb
    from joblib import Parallel, delayed

    Kp = fun.findgen((kprange[1] - kprange[0]) / dkp + 1) * dkp + kprange[0]
    n_exp = np.shape(ccf)[0]
    KpVsys = np.zeros((len(Kp), len(rv)))
    KpVsys_e = np.zeros((len(Kp), len(rv)))
    transit = sp.transit(dp) - 1.0
    transit /= np.nansum(transit)
    transitblock = fun.rebinreform(transit, len(rv)).T

    def Kp_parallel(i):
        dRV = sp.RV(dp, vorb=i) * (-1.0)
        ccf_shifted = shift_ccf(rv, ccf, dRV)
        ccf_e_shifted = shift_ccf(rv, ccf_e, dRV)
        return (np.nansum(transitblock * ccf_shifted, axis=0), (np.nansum(
            (transitblock * ccf_e_shifted)**2.0, axis=0))**0.5)

    KpVsys, KpVsys_e = zip(*Parallel(n_jobs=-1, verbose=5)(
        delayed(Kp_parallel)(i) for i in Kp))

    return (Kp, KpVsys, KpVsys_e)
Example #2
0
def construct_KpVsys(rv, ccf, ccf_e, dp, kprange=[0, 300], dkp=1.0):
    """The name says it all. Do good tests."""
    import tayph.functions as fun
    import tayph.operations as ops
    import numpy as np
    import tayph.system_parameters as sp
    import matplotlib.pyplot as plt
    import astropy.io.fits as fits
    import tayph.util as ut
    import sys
    import pdb
    Kp = fun.findgen((kprange[1] - kprange[0]) / dkp + 1) * dkp + kprange[0]
    n_exp = np.shape(ccf)[0]
    KpVsys = np.zeros((len(Kp), len(rv)))
    KpVsys_e = np.zeros((len(Kp), len(rv)))
    transit = sp.transit(dp) - 1.0
    transit /= np.nansum(transit)
    transitblock = fun.rebinreform(transit, len(rv)).T

    j = 0
    ccfs = []
    for i in Kp:
        dRV = sp.RV(dp, vorb=i) * (-1.0)
        ccf_shifted = shift_ccf(rv, ccf, dRV)
        ccf_e_shifted = shift_ccf(rv, ccf_e, dRV)
        ccfs.append(ccf_shifted)
        KpVsys[j, :] = np.nansum(transitblock * ccf_shifted, axis=0)
        KpVsys_e[j, :] = (np.nansum((transitblock * ccf_e_shifted)**2.0,
                                    axis=0))**0.5
        # plt.plot(rv,KpVsys[j,:])
        # plt.fill_between(rv, KpVsys[j,:]-KpVsys_e[j,:], KpVsys[j,:]+KpVsys_e[j,:],alpha=0.5)
        # plt.show()
        # pdb.set_trace()
        j += 1
        ut.statusbar(i, Kp)
    return (Kp, KpVsys, KpVsys_e)
Example #3
0
def mask_orders(list_of_wls,list_of_orders,dp,maskname,w,c_thresh,manual=False):
    """
    This code takes the list of orders and masks out bad pixels.
    It combines two steps, a simple sigma clipping step and a manual step, where
    the user can interactively identify bad pixels in each order. The sigma
    clipping is done on a threshold of c_thresh, using a rolling standard dev.
    with a width of w pixels. Manual masking is a big routine needed to support
    a nice GUI to do that.

    If c_thresh is set to zero, sigma clipping is skipped. If manual=False, the
    manual selection of masking regions (which is manual labour) is turned off.
    If both are turned off, the list_of_orders is returned unchanged.

    If either or both are active, the routine will output 1 or 2 FITS files that
    contain a stack (cube) of the masks for each order. The first file is the mask
    that was computed automatically, the second is the mask that was constructed
    manually. This is done so that the manual mask can be transplanted onto another
    dataset, or saved under a different file-name, to limit repetition of work.

    At the end of the routine, the two masks are merged into a single list, and
    applied to the list of orders.
    """
    import tayph.operations as ops
    import numpy as np
    import tayph.functions as fun
    import tayph.plotting as plotting
    import sys
    import matplotlib.pyplot as plt
    import tayph.util as ut
    import warnings
    from tayph.vartests import typetest,dimtest,postest
    ut.check_path(dp)
    typetest(maskname,str,'maskname in mask_orders()')
    typetest(w,[int,float],'w in mask_orders()')
    typetest(c_thresh,[int,float],'c_thresh in mask_orders()')
    postest(w,'w in mask_orders()')
    postest(c_thresh,'c_thresh in mask_orders()')
    typetest(list_of_wls,list,'list_of_wls in mask_orders()')
    typetest(list_of_orders,list,'list_of_orders in mask_orders()')
    typetest(manual,bool,'manual keyword in mask_orders()')
    dimtest(list_of_wls,[0,0],'list_of_wls in mask_orders()')
    dimtest(list_of_orders,[len(list_of_wls),0,0],'list_of_orders in mask_orders()')

    if c_thresh <= 0 and manual == False:
        print('---WARNING in mask_orders: c_thresh is set to zero and manual masking is turned off.')
        print('---Returning orders unmasked.')
        return(list_of_orders)

    N = len(list_of_orders)
    void = fun.findgen(N)

    list_of_orders = ops.normalize_orders(list_of_orders,list_of_orders)[0]#first normalize. Dont want outliers to
    #affect the colour correction later on, so colour correction cant be done before masking, meaning
    #that this needs to be done twice; as colour correction is also needed for proper maskng. The second variable is
    #a dummy to replace the expected list_of_sigmas input.
    N_NaN = 0
    list_of_masked_orders = []

    for i in range(N):
        list_of_masked_orders.append(list_of_orders[i])

    list_of_masks = []

    if c_thresh > 0:#Check that c_thresh is positive. If not, skip sigma clipping.
        print('------Sigma-clipping mask')
        for i in range(N):
            order = list_of_orders[i]
            N_exp = np.shape(order)[0]
            N_px = np.shape(order)[1]
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=RuntimeWarning)
                meanspec = np.nanmean(order,axis = 0)
            meanblock = fun.rebinreform(meanspec,N_exp)
            res = order / meanblock - 1.0
            sigma = fun.running_MAD_2D(res,w)
            with np.errstate(invalid='ignore'):#https://stackoverflow.com/questions/25345843/inequality-comparison-of-numpy-array-with-nan-to-a-scalar
                sel = np.abs(res) >= c_thresh*sigma
                N_NaN += np.sum(sel)#This is interesting because True values count as 1, and False as zero.
                order[sel] = np.nan
            list_of_masks.append(order*0.0)
            ut.statusbar(i,void)

        print(f'%s outliers identified and set to NaN ({N_NaN}/{round(N_NaN/np.size(list_of_masks)*100.0,3)}).')
    else:
        print('------Skipping sigma-clipping (c_thres <= 0)')
        #Do nothing to list_of_masks. It is now an empty list.
        #We now automatically proceed to manual masking, because at this point
        #it has already been established that it must have been turned on.


    list_of_masks_manual = []
    if manual == True:


        previous_list_of_masked_columns = load_columns_from_file(dp,maskname,mode='relaxed')
        list_of_masked_columns = manual_masking(list_of_wls,list_of_orders,list_of_masks,saved = previous_list_of_masked_columns)
        print('------Successfully concluded manual mask.')
        write_columns_to_file(dp,maskname,list_of_masked_columns)

        print('------Building manual mask from selected columns')
        for i in range(N):
            order = list_of_orders[i]
            N_exp = np.shape(order)[0]
            N_px = np.shape(order)[1]
            list_of_masks_manual.append(np.zeros((N_exp,N_px)))
            for j in list_of_masked_columns[i]:
                list_of_masks_manual[i][:,int(j)] = np.nan

    #We write 1 or 2 mask files here. The list of manual masks
    #and list_of_masks (auto) are either filled, or either is an emtpy list if
    #c_thresh was set to zero or manual was set to False (because they were defined
    #as empty lists initially, and then not filled with anything).
    write_mask_to_file(dp,maskname,list_of_masks,list_of_masks_manual)
    return(0)
Example #4
0
def normalize_orders(list_of_orders, list_of_sigmas, deg=1, nsigma=4):
    """
    If deg is set to 1, this function will normalise based on the mean flux in each order.
    If set higher, it will remove the average spectrum in each order and fit a polynomial
    to the residual. This means that in the presence of spectral lines, the fluxes will be
    slightly lower than if def=1 is used. nsigma is only used if deg > 1, and is used to
    throw away outliers from the polynomial fit. The program also computes the total
    mean flux of each exposure in the time series - totalled over all orders. These
    are important to correctly weigh the cross-correlation functions later. The
    inter-order colour correction is assumed to be an insignificant modification to
    these weights.

    Parameters
    ----------
    list_of_orders : list
        The list of 2D orders that need to be normalised.

    list_of_sigmas : list
        The list of 2D error matrices corresponding to the 2D orders that need to be normalised.

    deg : int
        The polynomial degree to remove. If set to 1, only the average flux is removed. If higher,
        polynomial fits are made to the residuals after removal of the average spectrum.

    nsigma : int, float
        The number of sigmas beyond which outliers are rejected from the polynomial fit.
        Only used when deg > 1.

    Returns
    -------
    out_list_of_orders : list
        The normalised 2D orders.
    out_list_of_sigmas : list
        The corresponding errors.
    meanfluxes : np.array
        The mean flux of each exposure in the time series, averaged over all orders.
    """
    import numpy as np
    import tayph.functions as fun
    from tayph.vartests import dimtest, postest, typetest
    import warnings
    typetest(list_of_orders, list, 'list_of_orders in ops.normalize_orders()')
    typetest(list_of_sigmas, list, 'list_of_sigmas in ops.normalize_orders()')

    dimtest(list_of_orders[0], [0, 0])  #Test that the first order is 2D.
    dimtest(list_of_sigmas[0],
            [0, 0])  #And that the corresponding sigma array is, as well.
    n_exp = np.shape(list_of_orders[0])[0]  #Get the number of exposures.
    for i in range(len(list_of_orders)):  #Should be the same for all orders.
        dimtest(list_of_orders[i], [n_exp, 0])
        dimtest(list_of_sigmas[i], np.shape(list_of_orders[i]))
    typetest(deg, int, 'degree in ops.normalize_orders()')
    typetest(nsigma, [int, float], 'nsigma in ops.normalize_orders()')
    postest(deg, 'degree in ops.normalize_orders()')
    postest(nsigma, 'degree in ops.normalize_orders()')

    N = len(list_of_orders)
    out_list_of_orders = []
    out_list_of_sigmas = []

    #First compute the exposure-to-exposure flux variations to be used as weights.
    meanfluxes = fun.findgen(n_exp) * 0.0
    N_i = 0
    for i in range(N):
        m = np.nanmedian(list_of_orders[i], axis=1)  #Median or mean?
        if np.sum(np.isnan(m)) > 0:
            print(
                '---Warning in normalise_orders: Skipping order %s because many nans are present.'
                % i)
        else:
            N_i += 1
            meanfluxes += m  #These contain the exposure-to-exposure variability of the time-series.
    meanfluxes /= N_i  #These are the weights.

    if deg == 1:
        for i in range(N):

            #What I'm doing here is probably stupid and numpy division will probably work just fine without
            #IDL-relics.
            n_px = np.shape(list_of_orders[i])[1]
            meanflux = np.nanmedian(
                list_of_orders[i],
                axis=1)  #Average flux in each order. Median or mean?
            meanblock = fun.rebinreform(
                meanflux / np.nanmean(meanflux), n_px
            ).T  #This is a slow operation. Row-by-row division is better done using a double-transpose...
            out_list_of_orders.append(list_of_orders[i] / meanblock)
            out_list_of_sigmas.append(list_of_sigmas[i] / meanblock)
    else:
        for i in range(N):
            with warnings.catch_warnings():
                warnings.simplefilter("ignore", category=RuntimeWarning)
                meanspec = np.nanmean(list_of_orders[i],
                                      axis=0)  #Average spectrum in each order.
            x = np.array(range(len(meanspec)))
            poly_block = list_of_orders[
                i] * 0.0  #Array that will host the polynomial fits.
            colour = list_of_orders[
                i] / meanspec  #What if there are zeroes? I.e. padding around the edges of the order?
            for j, s in enumerate(list_of_orders[i]):
                idx = np.isfinite(colour[j])
                if np.sum(idx) > 0:
                    p = np.poly1d(np.polyfit(x[idx], colour[j][idx], deg))(
                        x)  #Polynomial fit to the colour variation.
                    res = colour[
                        j] / p - 1.0  #The residual, which is flat around zero if it's a good fit. This has all sorts of line residuals that we need to throw out.
                    #We do that using the weight keyword of polyfit, and just set all those weights to zero.
                    sigma = np.nanstd(res)
                    w = x * 0.0 + 1.0  #Start with a weight function that is 1.0 everywhere.
                    with warnings.catch_warnings():
                        warnings.simplefilter("ignore",
                                              category=RuntimeWarning)
                        w[np.abs(res) > nsigma * sigma] = 0.0
                    w = x * 0.0 + 1.0  #Start with a weight function that is 1.0 everywhere.
                    p2 = np.poly1d(
                        np.polyfit(x[idx], colour[j][idx], deg, w=w[idx])
                    )(
                        x
                    )  #Second, weighted polynomial fit to the colour variation.
                    poly_block[j] = p2

            out_list_of_orders.append(list_of_orders[i] / poly_block)
            out_list_of_sigmas.append(list_of_sigmas[i] / poly_block)
    return (out_list_of_orders, out_list_of_sigmas, meanfluxes)
Example #5
0
def clean_ccf(rv, ccf, ccf_e, dp):
    """
    This routine normalizes the CCF fluxes and subtracts the average out of
    transit CCF, using the transit lightcurve as a mask.


    Parameters
    ----------

    rv : np.ndarray
        The radial velocity axis

    ccf : np.ndarray
        The CCF with second dimension matching the length of rv.

    ccf_e : np.ndarray
        The error on ccf.

    dp : str or path-like
        The datapath of the present dataset, to establish which exposures in ccf
        are in or out of transit.

    Returns
    -------

    ccf_n : np.ndarray
        The transit-lightcurve normalised CCF.

    ccf_ne : np.ndarray
        The error on ccf_n

    ccf_nn : np.ndarray
        The CCF relative to the out-of-transit time-averaged, if sufficient (>25%
        of the time-series) out of transit exposures were available. Otherwise, the
        average over the entire time-series is used.

    ccf_ne : np.array
        The error on ccf_nn.


    """

    import numpy as np
    import tayph.functions as fun
    import tayph.util as ut
    from matplotlib import pyplot as plt
    import pdb
    import math
    import tayph.system_parameters as sp
    import tayph.operations as ops
    import astropy.io.fits as fits
    import sys
    import copy
    from tayph.vartests import typetest, dimtest, nantest

    typetest(rv, np.ndarray, 'rv in clean_ccf()')
    typetest(ccf, np.ndarray, 'ccf in clean_ccf')
    typetest(ccf_e, np.ndarray, 'ccf_e in clean_ccf')
    dp = ut.check_path(dp)
    dimtest(ccf, [0, len(rv)])
    dimtest(ccf_e, [0, len(rv)])
    nantest(rv, 'rv in clean_ccf()')
    nantest(ccf, 'ccf in clean_ccf()')
    nantest(ccf_e, 'ccf_e in clean_ccf()')
    #ADD PARAMGET DV HERE.

    transit = sp.transit(dp)
    # transitblock = fun.rebinreform(transit,len(rv))

    Nrv = int(math.floor(len(rv)))

    baseline_ccf = np.hstack((ccf[:,
                                  0:int(0.25 * Nrv)], ccf[:,
                                                          int(0.75 * Nrv):]))
    baseline_ccf_e = np.hstack(
        (ccf_e[:, 0:int(0.25 * Nrv)], ccf_e[:, int(0.75 * Nrv):]))
    baseline_rv = np.hstack((rv[0:int(0.25 * Nrv)], rv[int(0.75 * Nrv):]))
    meanflux = np.median(
        baseline_ccf, axis=1
    )  #Normalize the baseline flux, but away from the signal of the planet.
    meanflux_e = 1.0 / len(baseline_rv) * np.sqrt(
        np.nansum(baseline_ccf_e**2.0, axis=1))  #1/N times sum of squares.
    #I validated that this is approximately equal to ccf_e/sqrt(N).
    meanblock = fun.rebinreform(meanflux, len(rv))
    meanblock_e = fun.rebinreform(meanflux_e, len(rv))

    ccf_n = ccf / meanblock.T
    ccf_ne = np.abs(ccf_n) * np.sqrt(
        (ccf_e / ccf)**2.0 + (meanblock_e.T / meanblock.T)**
        2.0)  #R=X/Z -> dR = R*sqrt( (dX/X)^2+(dZ/Z)^2 )
    #I validated that this is essentially equal to ccf_e/meanblock.T; as expected because the error on the mean spectrum is small compared to ccf_e.

    if np.sum(transit == 1) == 0:
        print(
            '------WARNING in Cleaning: The data contains only in-transit exposures.'
        )
        print('------The mean ccf is taken over the entire time-series.')
        meanccf = np.nanmean(ccf_n, axis=0)
        meanccf_e = 1.0 / len(transit) * np.sqrt(
            np.nansum(ccf_ne**2.0,
                      axis=0))  #I validated that this is approximately equal
        #to sqrt(N)*ccf_ne, where N is the number of out-of-transit exposures.
    elif np.sum(transit == 1) <= 0.25 * len(transit):
        print(
            '------WARNING in Cleaning: The data contains very few (<25%) out of transit exposures.'
        )
        print('------The mean ccf is taken over the entire time-series.')
        meanccf = np.nanmean(ccf_n, axis=0)
        meanccf_e = 1.0 / len(transit) * np.sqrt(
            np.nansum(ccf_ne**2.0,
                      axis=0))  #I validated that this is approximately equal
        #to sqrt(N)*ccf_ne, where N is the number of out-of-transit exposures.
    if np.min(transit) == 1.0:
        print(
            '------WARNING in Cleaning: The data is not predicted to contain in-transit exposures.'
        )
        print(
            f'------If you expect to be dealing with transit-data, please check the ephemeris '
            f'at {dp}')
        print('------The mean ccf is taken over the entire time-series.')
        meanccf = np.nanmean(ccf_n, axis=0)
        meanccf_e = 1.0 / len(transit) * np.sqrt(
            np.nansum(ccf_ne**2.0,
                      axis=0))  #I validated that this is approximately equal
        #to sqrt(N)*ccf_ne, where N is the number of out-of-transit exposures.
    else:
        meanccf = np.nanmean(ccf_n[transit == 1.0, :], axis=0)
        meanccf_e = 1.0 / np.sum(transit == 1) * np.sqrt(
            np.nansum(ccf_ne[transit == 1.0, :]**2.0,
                      axis=0))  #I validated that this is approximately equal
        #to sqrt(N)*ccf_ne, where N is the number of out-of-transit exposures.

    meanblock2 = fun.rebinreform(meanccf, len(meanflux))
    meanblock2_e = fun.rebinreform(meanccf_e, len(meanflux))

    ccf_nn = ccf_n / meanblock2  #MAY NEED TO DO SUBTRACTION INSTEAD TOGETHER W. NORMALIZATION OF LIGHTCURVE. SEE ABOVE.
    ccf_nne = np.abs(
        ccf_n / meanblock2) * np.sqrt((ccf_ne / ccf_n)**2.0 +
                                      (meanblock2_e / meanblock2)**2.0)
    #I validated that this error is almost equal to ccf_ne/meanccf

    #ONLY WORKS IF LIGHTCURVE MODEL IS ACCURATE, i.e. if Euler observations are available.
    # print("---> WARNING IN CLEANING.CLEAN_CCF(): NEED TO ADD A FUNCTION THAT YOU CAN NORMALIZE BY THE LIGHTCURVE AND SUBTRACT INSTEAD OF DIVISION!")
    return (ccf_n, ccf_ne, ccf_nn - 1.0, ccf_nne)