Python sigmaClip Examples, dave.misc.noise.sigmaClip Python Examples

Example #1

0

Show file

File: fitData.py Project: barentsen/dave

def getNoiseParams(lcv):
    # sigma clip it
    sigmaClip_tf = noise.sigmaClip(lcv, 5.0)
    lcv = lcv[~sigmaClip_tf]

    # get params
    rta = noise.computeRollTweakAmplitude(lcv)
    sgcdpp = noise.computeSgCdpp_ppm(lcv)
    scatter = noise.estimateScatterWithMarshallMethod(lcv)

    return rta, sgcdpp, scatter

Example #2

0

Show file

File: fitData.py Project: CheerfulUser/SynDiff

def getNoiseParams(lcv):
    # sigma clip it
    sigmaClip_tf = noise.sigmaClip(lcv, 5.)
    lcv = lcv[~sigmaClip_tf]

    # get params
    rta = noise.computeRollTweakAmplitude(lcv)
    sgcdpp = noise.computeSgCdpp_ppm(lcv)
    scatter = noise.estimateScatterWithMarshallMethod(lcv)

    return rta, sgcdpp, scatter

Example #3

0

Show file

File: fitData.py Project: barentsen/dave

def paramSubplot(cadenceSum, numPrinCompList, results_list):
    y = cadenceSum / np.mean(cadenceSum) - 1
    raw_rta = noise.computeRollTweakAmplitude(y)
    raw_sgcdpp = noise.computeSgCdpp_ppm(noise.sigmaClip(y, 5.0))
    raw_scatter = noise.estimateScatterWithMarshallMethod(y)
    f, axarr = plt.subplots(3, sharex=True)
    axarr[0].axhline(raw_rta, label="RTA for raw light curve")
    axarr[1].axhline(raw_sgcdpp, label="sgcdpp for raw light curve")
    axarr[2].axhline(raw_scatter, label="scatter for raw light curve")
    axarr[0].plot(numPrinCompList, results_list.T[1], ".", color="green")
    axarr[1].plot(numPrinCompList, results_list.T[2], ".", color="green")
    axarr[2].plot(numPrinCompList, results_list.T[3], ".", color="green")
    plt.xlabel("Number of Principal Components")
    axarr[0].set_ylabel("Roll Tweak Amplitude")
    axarr[1].set_ylabel("sgcdpp")
    axarr[2].set_ylabel("scatter")
    plt.show()

Example #4

0

Show file

File: fitData.py Project: CheerfulUser/SynDiff

def paramSubplot(cadenceSum, numPrinCompList, results_list):
    y = cadenceSum / np.mean(cadenceSum) - 1
    raw_rta = noise.computeRollTweakAmplitude(y)
    raw_sgcdpp = noise.computeSgCdpp_ppm(noise.sigmaClip(y, 5.))
    raw_scatter = noise.estimateScatterWithMarshallMethod(y)
    f, axarr = plt.subplots(3, sharex=True)
    axarr[0].axhline(raw_rta, label="RTA for raw light curve")
    axarr[1].axhline(raw_sgcdpp, label="sgcdpp for raw light curve")
    axarr[2].axhline(raw_scatter, label="scatter for raw light curve")
    axarr[0].plot(numPrinCompList, results_list.T[1], ".", color="green")
    axarr[1].plot(numPrinCompList, results_list.T[2], ".", color="green")
    axarr[2].plot(numPrinCompList, results_list.T[3], ".", color="green")
    plt.xlabel("Number of Principal Components")
    axarr[0].set_ylabel("Roll Tweak Amplitude")
    axarr[1].set_ylabel("sgcdpp")
    axarr[2].set_ylabel("scatter")
    plt.show()

Example #5

0

Show file

File: productionPCA.py Project: barentsen/dave

def chooseNumPrinComps(prinComps, totLightcurve, numPixels):
    """Chooses the optimal # of principal components to use in the final fit
    
    Generates a list of sgcdpp values by doing an sgcdpp calculation (see
    noise.computeSgCdpp_ppm)
    
    Iterates through the list of sgcdpp values and determines where the slope 
    between two values starts to "flatten out" based on a certain threshold
    
    Inputs:
    ----------
    prinComps
        (2d numpy array) Size: (number of components, time steps)
        Numpy array of the principal components, ranked from most influential 
        to least (i.e. prinComps[0] is the most dominant component in the light
        curve)
    totLightcurve
        (1d numpy array) Array of summed raw light curve generated by summing
        each cadence, the length of which should be equal to the number of time
        steps for this particular data set.

    numPixels
        (int) the number of pixels per cadence
        
    Returns:
    ----------
    optimalNumPC
        (int) the optimal number of principal components to use in the best fit
        and correction to the raw light curve
    
    """
    # in my experience, the optimal number of principal components is always 
    # less than half of the total number of pixels. To save computation time, 
    # I only generate sgcdpp values for the first half of principal components    
    numPrinComps_sgcdppCalc = numPixels/2
    
    
    # threshold for sigma clip
    sigmaClipThresh = 5.    
    
    # get a list of sgcdpp values for fitting different numbers of prin comps
    n = 1
    sgcdppList = []
    while n < numPrinComps_sgcdppCalc:
        
        prinCompsToFit = prinComps[:n]
        
        # fit the light curve to the amount of prin comps selected
        fittedCurve = curveFit(prinCompsToFit, totLightcurve)

        # correct the light curve by subtracting the prin comp fit
        correctedCurve = totLightcurve - fittedCurve
        
        # make the mean of the corrected curve = 0, 
        # necessary for sgcdpp calculation
        correctedCurveMeanZero = correctedCurve / np.mean(correctedCurve) - 1
        
        # get sigma clip true/false values on the corrected curve with mean 0
        sigClip_tf = noise.sigmaClip(correctedCurveMeanZero, sigmaClipThresh)
        
        # perform sigma clip
        correctedMeanZeroSigClip = correctedCurveMeanZero[~sigClip_tf]
        
        # get the sgcdpp value
        sgcdppValue = noise.computeSgCdpp_ppm(correctedMeanZeroSigClip)
        
        sgcdppList.append([n,sgcdppValue])
        
        n += 1
    
    sgcdppList = np.array(sgcdppList).T
  
    # choose the number of principal components to use
    optimalNumPC = chooseBestSGcdppValue(sgcdppList)
    
    return optimalNumPC

Example #6

0

Show file

def chooseNumPrinComps(prinComps, totLightcurve, numPixels):
    """Chooses the optimal # of principal components to use in the final fit
    
    Generates a list of sgcdpp values by doing an sgcdpp calculation (see
    noise.computeSgCdpp_ppm)
    
    Iterates through the list of sgcdpp values and determines where the slope 
    between two values starts to "flatten out" based on a certain threshold
    
    Inputs:
    ----------
    prinComps
        (2d numpy array) Size: (number of components, time steps)
        Numpy array of the principal components, ranked from most influential 
        to least (i.e. prinComps[0] is the most dominant component in the light
        curve)
    totLightcurve
        (1d numpy array) Array of summed raw light curve generated by summing
        each cadence, the length of which should be equal to the number of time
        steps for this particular data set.

    numPixels
        (int) the number of pixels per cadence
        
    Returns:
    ----------
    optimalNumPC
        (int) the optimal number of principal components to use in the best fit
        and correction to the raw light curve
    
    """
    # in my experience, the optimal number of principal components is always
    # less than half of the total number of pixels. To save computation time,
    # I only generate sgcdpp values for the first half of principal components
    numPrinComps_sgcdppCalc = numPixels / 2

    # threshold for sigma clip
    sigmaClipThresh = 5.

    # get a list of sgcdpp values for fitting different numbers of prin comps
    n = 1
    sgcdppList = []
    while n < numPrinComps_sgcdppCalc:

        prinCompsToFit = prinComps[:n]

        # fit the light curve to the amount of prin comps selected
        fittedCurve = curveFit(prinCompsToFit, totLightcurve)

        # correct the light curve by subtracting the prin comp fit
        correctedCurve = totLightcurve - fittedCurve

        # make the mean of the corrected curve = 0,
        # necessary for sgcdpp calculation
        correctedCurveMeanZero = correctedCurve / np.mean(correctedCurve) - 1

        # get sigma clip true/false values on the corrected curve with mean 0
        sigClip_tf = noise.sigmaClip(correctedCurveMeanZero, sigmaClipThresh)

        # perform sigma clip
        correctedMeanZeroSigClip = correctedCurveMeanZero[~sigClip_tf]

        # get the sgcdpp value
        sgcdppValue = noise.computeSgCdpp_ppm(correctedMeanZeroSigClip)

        sgcdppList.append([n, sgcdppValue])

        n += 1

    sgcdppList = np.array(sgcdppList).T

    # choose the number of principal components to use
    optimalNumPC = chooseBestSGcdppValue(sgcdppList)

    return optimalNumPC

Example #7

0

Show file

File: fitData.py Project: barentsen/dave

def varyPrinComp(epic, campaign, plotMask):
    # get the data
    getData_return = getData(epic, campaign)
    data = getData_return[0]
    inds_to_eliminate = getData_return[1].astype(bool)

    # plot the image
    # plotCadence(data[0], axis='relative')

    # create a mask for the light curve
    mask = createMask(data[0], thresh=0, title="Light Curve Mask", plotMask=plotMask)
    lcvMatrix = reduceAperture(mask, data)

    # create a mask for PCA
    pca_mask = createMask(data[0], thresh=0, title="PCA Mask", plotMask=plotMask)
    pcaMatrix = reduceAperture(pca_mask, data)

    # take out data points with thruster firings
    lcvMatrix = lcvMatrix[:, ~inds_to_eliminate]
    pcaMatrix = pcaMatrix[:, ~inds_to_eliminate]

    # get rid of nans in matrices
    lcvMatrix = noMoreNaN(lcvMatrix)
    assert checkNaN(lcvMatrix)
    pcaMatrix = noMoreNaN(pcaMatrix)
    assert checkNaN(pcaMatrix)

    # make the raw light curve
    cadenceSum = np.sum(lcvMatrix, axis=0)

    t = np.arange(len(cadenceSum))

    # plot the raw light curve
    make_plot(t, cadenceSum, title="Raw Light Curve, e: %s, c: %s" % (epic, campaign))

    # make a list of number of principal comps to try
    numPrinCompList = np.arange(2, np.sum(mask), 2)

    rta_list = []
    sgcdpp_list = []
    scatter_list = []
    results_list = []
    n = 0
    offset = 0.1

    # plt.figure()
    for numPrinComps in numPrinCompList:
        curveFit_return = curveFit(pcaMatrix, cadenceSum, numPrinComps)
        fittedCurve = curveFit_return[0]

        # make the mean zero
        corrected = correctedCurve(cadenceSum, fittedCurve)
        corrected /= np.mean(corrected)
        corrected -= 1

        sigmaClip_tf = noise.sigmaClip(corrected, 5.0)
        corrected = corrected[~sigmaClip_tf]
        rta = noise.computeRollTweakAmplitude(corrected)
        sgcdpp = noise.computeSgCdpp_ppm(corrected)
        scatter = noise.estimateScatterWithMarshallMethod(corrected)

        rta_list.append(rta)
        sgcdpp_list.append(sgcdpp)
        scatter_list.append(scatter)

        results_list.append([numPrinComps, rta, sgcdpp, scatter])
        # plt.plot(range(len(corrected)), corrected+ n*offset, ".", markersize=4, label = "%f,%i"%(sgcdpp, int(numPrinComps)))
        n += 1

    # plt.legend()
    # plt.show()

    # plot the three params wrt the raw values
    if plotMask == True:
        y = cadenceSum / np.mean(cadenceSum) - 1
        raw_rta = noise.computeRollTweakAmplitude(y)
        raw_sgcdpp = noise.computeSgCdpp_ppm(noise.sigmaClip(y, 5.0))
        raw_scatter = noise.estimateScatterWithMarshallMethod(y)
        f, axarr = plt.subplots(3, sharex=True)
        axarr[0].axhline(raw_rta, label="RTA for raw light curve")
        axarr[1].axhline(raw_sgcdpp, label="sgcdpp for raw light curve")
        axarr[2].axhline(raw_scatter, label="scatter for raw light curve")
        axarr[0].plot(numPrinCompList, rta_list, ".", color="green")
        axarr[1].plot(numPrinCompList, sgcdpp_list, ".", color="green")
        axarr[2].plot(numPrinCompList, scatter_list, ".", color="green")
        plt.xlabel("Number of Principal Components")
        axarr[0].set_ylabel("Roll Tweak Amplitude")
        axarr[1].set_ylabel("sgcdpp")
        axarr[2].set_ylabel("scatter")
        plt.show()

    # plot the corrected light curve with the lowest sgcdpp
    # ==============================================================================
    #     optimalPC = numPrinCompList[np.argmin(sgcdpp_list)]
    #     fittedCurve = curveFit(pcaMatrix, cadenceSum, optimalPC)[0]
    #     optimal_lcv = correctedCurve(cadenceSum, fittedCurve)
    #     make_plot(t, optimal_lcv, title="e: %s, c: %s,sgcdpp=%s PC=%s"%(epic, campaign,
    #                                                             str(np.min(sgcdpp_list)),
    #                                                             numPrinCompList[np.argmin(sgcdpp_list)]))
    # ==============================================================================

    # plot the corrected light curve with the smallest slope between sgcdpp values
    smallest_slope = np.argmin(np.abs(np.diff(sgcdpp_list)))
    optimalPC = numPrinCompList[smallest_slope]
    fittedCurve = curveFit(pcaMatrix, cadenceSum, optimalPC)[0]
    optimal_lcv = correctedCurve(cadenceSum, fittedCurve)
    sigmaClip_tf = noise.sigmaClip(optimal_lcv, 5.0)
    make_plot(t, optimal_lcv, show=False)
    make_plot(
        t[sigmaClip_tf],
        optimal_lcv[sigmaClip_tf],
        new=False,
        title="e:%s, c:%s, smallest slope, PC=%s" % (epic, campaign, numPrinCompList[smallest_slope]),
        marker="ro",
    )

    folded = np.fmod(t, 4.16)
    make_plot(t, folded)

    # ==============================================================================
    #     # plot the corrected light curve with the lowest rta
    #     plt.figure()
    #     plt.title("epic %s campaign %s lowest rta=%s PC=%s"%(epic, campaign,
    #                                                             str(np.min(rta_list)),
    #                                                             numPrinCompList[np.argmin(rta_list)]))
    #     optimalPC = numPrinCompList[np.argmin(rta_list)]
    #     fittedCurve = curveFit(pcaMatrix, cadenceSum, optimalPC)[0]
    #     optimal_lcv = correctedCurve(cadenceSum, fittedCurve)
    #     plt.plot(range(len(optimal_lcv)), optimal_lcv, ".")
    # ==============================================================================
    # plt.show()

    return np.array(results_list).T

Example #8

0

Show file

File: fitData.py Project: CheerfulUser/SynDiff

def varyPrinComp(epic, campaign, plotMask):
    # get the data
    getData_return = getData(epic, campaign)
    data = getData_return[0]
    inds_to_eliminate = getData_return[1].astype(bool)

    # plot the image
    #plotCadence(data[0], axis='relative')

    # create a mask for the light curve
    mask = createMask(data[0],
                      thresh=0,
                      title="Light Curve Mask",
                      plotMask=plotMask)
    lcvMatrix = reduceAperture(mask, data)

    # create a mask for PCA
    pca_mask = createMask(data[0],
                          thresh=0,
                          title="PCA Mask",
                          plotMask=plotMask)
    pcaMatrix = reduceAperture(pca_mask, data)

    # take out data points with thruster firings
    lcvMatrix = lcvMatrix[:, ~inds_to_eliminate]
    pcaMatrix = pcaMatrix[:, ~inds_to_eliminate]

    #get rid of nans in matrices
    lcvMatrix = noMoreNaN(lcvMatrix)
    assert checkNaN(lcvMatrix)
    pcaMatrix = noMoreNaN(pcaMatrix)
    assert checkNaN(pcaMatrix)

    # make the raw light curve
    cadenceSum = np.sum(lcvMatrix, axis=0)

    t = np.arange(len(cadenceSum))

    # plot the raw light curve
    make_plot(t,
              cadenceSum,
              title="Raw Light Curve, e: %s, c: %s" % (epic, campaign))

    # make a list of number of principal comps to try
    numPrinCompList = np.arange(2, np.sum(mask), 2)

    rta_list = []
    sgcdpp_list = []
    scatter_list = []
    results_list = []
    n = 0
    offset = 0.1

    #plt.figure()
    for numPrinComps in numPrinCompList:
        curveFit_return = curveFit(pcaMatrix, cadenceSum, numPrinComps)
        fittedCurve = curveFit_return[0]

        # make the mean zero
        corrected = correctedCurve(cadenceSum, fittedCurve)
        corrected /= np.mean(corrected)
        corrected -= 1

        sigmaClip_tf = noise.sigmaClip(corrected, 5.)
        corrected = corrected[~sigmaClip_tf]
        rta = noise.computeRollTweakAmplitude(corrected)
        sgcdpp = noise.computeSgCdpp_ppm(corrected)
        scatter = noise.estimateScatterWithMarshallMethod(corrected)

        rta_list.append(rta)
        sgcdpp_list.append(sgcdpp)
        scatter_list.append(scatter)

        results_list.append([numPrinComps, rta, sgcdpp, scatter])
        #plt.plot(range(len(corrected)), corrected+ n*offset, ".", markersize=4, label = "%f,%i"%(sgcdpp, int(numPrinComps)))
        n += 1

    #plt.legend()
    #plt.show()

    # plot the three params wrt the raw values
    if plotMask == True:
        y = cadenceSum / np.mean(cadenceSum) - 1
        raw_rta = noise.computeRollTweakAmplitude(y)
        raw_sgcdpp = noise.computeSgCdpp_ppm(noise.sigmaClip(y, 5.))
        raw_scatter = noise.estimateScatterWithMarshallMethod(y)
        f, axarr = plt.subplots(3, sharex=True)
        axarr[0].axhline(raw_rta, label="RTA for raw light curve")
        axarr[1].axhline(raw_sgcdpp, label="sgcdpp for raw light curve")
        axarr[2].axhline(raw_scatter, label="scatter for raw light curve")
        axarr[0].plot(numPrinCompList, rta_list, ".", color="green")
        axarr[1].plot(numPrinCompList, sgcdpp_list, ".", color="green")
        axarr[2].plot(numPrinCompList, scatter_list, ".", color="green")
        plt.xlabel("Number of Principal Components")
        axarr[0].set_ylabel("Roll Tweak Amplitude")
        axarr[1].set_ylabel("sgcdpp")
        axarr[2].set_ylabel("scatter")
        plt.show()

    # plot the corrected light curve with the lowest sgcdpp


#==============================================================================
#     optimalPC = numPrinCompList[np.argmin(sgcdpp_list)]
#     fittedCurve = curveFit(pcaMatrix, cadenceSum, optimalPC)[0]
#     optimal_lcv = correctedCurve(cadenceSum, fittedCurve)
#     make_plot(t, optimal_lcv, title="e: %s, c: %s,sgcdpp=%s PC=%s"%(epic, campaign,
#                                                             str(np.min(sgcdpp_list)),
#                                                             numPrinCompList[np.argmin(sgcdpp_list)]))
#==============================================================================

# plot the corrected light curve with the smallest slope between sgcdpp values
    smallest_slope = np.argmin(np.abs(np.diff(sgcdpp_list)))
    optimalPC = numPrinCompList[smallest_slope]
    fittedCurve = curveFit(pcaMatrix, cadenceSum, optimalPC)[0]
    optimal_lcv = correctedCurve(cadenceSum, fittedCurve)
    sigmaClip_tf = noise.sigmaClip(optimal_lcv, 5.)
    make_plot(t, optimal_lcv, show=False)
    make_plot(t[sigmaClip_tf],
              optimal_lcv[sigmaClip_tf],
              new=False,
              title="e:%s, c:%s, smallest slope, PC=%s" %
              (epic, campaign, numPrinCompList[smallest_slope]),
              marker='ro')

    folded = np.fmod(t, 4.16)
    make_plot(t, folded)

    #==============================================================================
    #     # plot the corrected light curve with the lowest rta
    #     plt.figure()
    #     plt.title("epic %s campaign %s lowest rta=%s PC=%s"%(epic, campaign,
    #                                                             str(np.min(rta_list)),
    #                                                             numPrinCompList[np.argmin(rta_list)]))
    #     optimalPC = numPrinCompList[np.argmin(rta_list)]
    #     fittedCurve = curveFit(pcaMatrix, cadenceSum, optimalPC)[0]
    #     optimal_lcv = correctedCurve(cadenceSum, fittedCurve)
    #     plt.plot(range(len(optimal_lcv)), optimal_lcv, ".")
    #==============================================================================
    #plt.show()

    return np.array(results_list).T