Esempio n. 1
0
def arraystat_2 (table, sid, season=0, rob=True, per=True, flags=0) :
    """ Calculates a complicated number of parameters for a given star.

    Inputs:
      table -- an ATpy table with time-series photometry
      sid -- a WFCAM source ID.
      
    Optional inputs:
      season -- which season to select (1,2,3, or other=All)
      rob -- also use Robust statistics? (takes longer, default True)
      per -- run period-finding? (takes longer, default True)
      flags -- Maximum ppErrBit quality flags to use (default 0)

    Returns:
      ret -- a data structure containing the computed values.
      """
    
    s_table = data_cut( table, [sid], season=season, flags=flags )

    if len(s_table) < 1:
        print "no data for %d!" % sid
        return None
    
    jcol = s_table.JAPERMAG3; jerr = s_table.JAPERMAG3ERR
    hcol = s_table.HAPERMAG3; herr = s_table.HAPERMAG3ERR
    kcol = s_table.KAPERMAG3; kerr = s_table.KAPERMAG3ERR
    jmhcol=s_table.JMHPNT   ; jmherr = s_table.JMHPNTERR
    hmkcol=s_table.HMKPNT   ; hmkerr = s_table.HMKPNTERR
    racol= s_table.RA
    decol= s_table.DEC

    date = s_table.MEANMJDOBS 

    messy_table = data_cut( table, [sid], season=-1 )
    jppcol=messy_table.JPPERRBITS
    hppcol=messy_table.HPPERRBITS
    kppcol=messy_table.KPPERRBITS

    # make an empty data structure and just assign it information, then return 
    # the object itself!!! then there's no more worrying about indices.
    class Empty():
        pass

    ret = Empty()
    
    ret.N = len(s_table)
    ret.RA = racol.mean()
    ret.DEC = decol.mean()
    
    ret.chip = get_chip(date[0], np.degrees(racol[0]), np.degrees(decol[0]))
    if ret.N > 4:
        ret.one_chip = ( get_chip(date[0], racol[0], decol[0]) ==
                         get_chip(date[1], racol[1], decol[1]) ==
                         get_chip(date[2], racol[2], decol[2]) ==
                         get_chip(date[3], racol[3], decol[3]) )
    else:
        ret.one_chip = True
    
    ret.Stetson = stetson.S(jcol, jerr, hcol, herr, kcol, kerr)
    
    ret.j = Empty();   ret.j.data = jcol;   ret.j.err = jerr
    ret.h = Empty();   ret.h.data = hcol;   ret.h.err = herr
    ret.k = Empty();   ret.k.data = kcol;   ret.k.err = kerr
    ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr
    ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr

    bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ]

    for b in bands:
        # use b.data, b.err
        
        b.rchi2 = reduced_chisq( b.data, b.err )

        b.mean = b.data.mean()
        b.rms = b.data.std()
        b.min = b.data.min()
        b.max = b.data.max()
        b.peak_trough = b.max - b.min

        b.mean_err = b.err.mean()

        # Robust quantifiers simply have an "r" at the end of their names
        if rob:
            b.datar = rb.removeoutliers(b.data, 3, niter=2)
            
            b.meanr = rb.meanr(b.data)
            b.rmsr = rb.stdr(b.data)
            b.minr = b.datar.min()
            b.maxr = b.datar.max()
            b.peak_troughr = b.maxr - b.minr

        # Period finding... is a little dodgy still, and might take forever
        if per:
            
            b.lsp = lsp(date, b.data, 6., 6.) # apologies if this is cluttered
            Jmax = lsp_mask(b.lsp[0], b.lsp[1])
            b.lsp_per = 1./ b.lsp[0][Jmax]
            b.lsp_pow = b.lsp[1][Jmax]
            b.fx2_per = 1./ test_analyze( date, b.data, b.err )

    # Finally we'll want to do the whole slope, distance on the JMH graph
    # (until I get the fitting done, we'll have to use hmk and jmh naively)
    ret.color_slope = (ret.jmh.peak_trough / ret.hmk.peak_trough)
    


    # and the pp_max, using the messy table
    ret.jpp_max = jppcol.max()
    ret.hpp_max = hppcol.max()
    ret.kpp_max = kppcol.max()

    return ret
def remove_nights2 (table, analysis):
    '''
    Removes nights from a table based on the output 
    of analyze_nights2.

    Must be called after analyze_nights2.

    Parameters
    ----------
    table : atpy.Table
        a data table of time-series photometry from the WFCAM
        Science Archive (WSA). Must include all magnitudes, colors,
        color errors, ppErrBits, and MeanMjdObs information.
    analysis : tuple
        The output of analyze_nights2.

    Returns
    -------
    newtable : atpy.Table
        The same data table, with all data from bad nights
        removed.
    badnights : ndarray
        The list of timestamps that were rejected.
    cuts : list of tuples
        The parameters used for rejecting nights. 
        Order of each tuple: 
        (mean_hmk, mean_jmh, spread_hmk, spread_jmh)
        
    '''

    # First, let's relabel analyze_nights2' output.
    
    nights, hmk, jmh = analysis #this depends on what analyze_nights outputs

    master_cloudy = []

    cuts = []
    
    for night_set, mean_hmk, mean_jmh in zip(nights, hmk, jmh):

        # Let's call in the big guns... robust statistics and outlier clipping
        # or, you know, some way to return the outliers.
        # Get the robust standard deviation and then write down everyone above.

        middle_hmk = rb.meanr(mean_hmk)
        middle_jmh = rb.meanr(mean_jmh)

        spread_hmk = rb.stdr(mean_hmk)
        spread_jmh = rb.stdr(mean_jmh)

        ellipse = np.sqrt( ((mean_hmk - middle_hmk)/spread_hmk)**2 +
                           ((mean_jmh - middle_jmh)/spread_jmh)**2 )

        cloudy = night_set[ ellipse > 3 ]
        
        print "let's test this."
        print "middle: ", middle_hmk
        print "spread: ", spread_hmk
        print "n_outliers: ", cloudy.size

        # now let's save cloudy
        master_cloudy.extend(cloudy)
        
        cuts.append((middle_hmk, middle_jmh,
                     spread_hmk, spread_jmh))
        

    
    # now let's remove the nights
    clean_data = table.where( 
        np.array([night not in master_cloudy for night in table.MEANMJDOBS]) 
                  )

    return clean_data, master_cloudy, cuts
Esempio n. 3
0
def statcruncher (table, sid, season=0, rob=True, per=True, 
                  graded=False, colorslope=False, flags=0) :
    """ 
    Calculates several statistical properties for a given star.

    Will work with "lonely" datapoints (i.e. not all JHK mags are 
    well-defined). Optionally works with graded data, too!

    Parameters
    ----------
    table : atpy.Table
        Table with time-series photometry
    sid : int
        13-digit WFCAM source ID of star to plot
    season : int, optional
        Which observing season of our dataset (1, 2, 3, or all).
        Any value that is not the integers (1, 2, or 3) will be 
        treated as "no season", and no time-cut will be made.
        Note that this is the default behavior.
    rob : bool, optional 
        Use robust statistics, in addition to normal ones?
        (takes longer, default True)
    per : bool, optional 
        Run period-finding? Uses fast chi-squared and lomb-scargle.
        (takes longer, default True)
    graded : bool, optional
        Also calculate Stetson indices using quality grades as weights?
        Uses stetson_graded; requires that the data has been graded by
        night_cleanser.null_cleanser_grader().
    colorslope : bool, optional
        Calculate color slopes? Runs them over (JvJ-H, KvH-K, J-HvH-K).
        Make sure your data has been color-error-corrected! Default False.
    flags : int, optional 
        Maximum ppErrBit quality flags to use (default 0)

    Returns
    -------
    ret : data structure 
        Contains the computed values.
        They can be accessed as attributes 
        (e.g., "ret.j_mean" or "ret.Stetson").

    """
    
    s_table = data_cut ( table, sid, season=season)

    if len(s_table) < 1:
        print "no data for %d!" % sid
        return None

    # First, let's compute single-band statistics. This will require
    # separate data_cuts on each band.

    full_jtable = band_cut(s_table, 'j')
    full_htable = band_cut(s_table, 'h')
    full_ktable = band_cut(s_table, 'k')

    j_table = band_cut(s_table, 'j', max_flag=flags)
    h_table = band_cut(s_table, 'h', max_flag=flags)
    k_table = band_cut(s_table, 'k', max_flag=flags)

    jmh_table = band_cut(j_table, 'h', max_flag=flags)
    hmk_table = band_cut(h_table, 'k', max_flag=flags)
    
    # jhk_table used only for colorslope
    jhk_table = band_cut( jmh_table, 'k', max_flag=flags)

    # get a date (x-axis) for each 
    jdate = j_table.MEANMJDOBS
    hdate = h_table.MEANMJDOBS
    kdate = k_table.MEANMJDOBS
    jmhdate = jmh_table.MEANMJDOBS
    hmkdate = hmk_table.MEANMJDOBS
#    date = s_table.MEANMJDOBS 
    
    # get a magnitude and magnitude error for each band
    jcol = j_table.JAPERMAG3; jerr = j_table.JAPERMAG3ERR
    hcol = h_table.HAPERMAG3; herr = h_table.HAPERMAG3ERR
    kcol = k_table.KAPERMAG3; kerr = k_table.KAPERMAG3ERR
    jmhcol= jmh_table.JMHPNT; jmherr = jmh_table.JMHPNTERR
    hmkcol= hmk_table.HMKPNT; hmkerr = hmk_table.HMKPNTERR

    # get the RA and DEC columns, checking for sensible values
    racol= s_table.RA[(s_table.RA > 0) & (s_table.RA < 7)]
    decol= s_table.DEC[(s_table.DEC > -4) & (s_table.DEC < 4)]

    # Now let's get some ability to track errorful data.
    # messy_table_j = band_cut( s_table, 'j')
    # messy_table_h = band_cut( s_table, 'h')
    # messy_table_k = band_cut( s_table, 'k')
    # jppcol = messy_table_j.JPPERRBITS
    # hppcol = messy_table_h.HPPERRBITS
    # kppcol = messy_table_k.KPPERRBITS

    # make an empty data structure and just assign it information, then return 
    # the object itself! then there's no more worrying about indices.
    class Empty():
        pass

    ret = Empty()
    
    # How many nights have observations in each band?
    ret.N_j = len(j_table)
    ret.N_h = len(h_table)
    ret.N_k = len(k_table)

    # What's the distribution of flags and nights?
    js = full_jtable.JPPERRBITS
    hs = full_htable.HPPERRBITS
    ks = full_ktable.KPPERRBITS

    ret.N_j_noflag = len(js[js == 0])
    ret.N_h_noflag = len(hs[hs == 0])
    ret.N_k_noflag = len(ks[ks == 0])

    ret.N_j_info = len(js[(js < 256) & (js > 0)])
    ret.N_h_info = len(hs[(hs < 256) & (hs > 0)])
    ret.N_k_info = len(ks[(ks < 256) & (ks > 0)])

    ret.N_j_warn = len(js[ js >= 256 ])
    ret.N_h_warn = len(hs[ hs >= 256 ])
    ret.N_k_warn = len(ks[ ks >= 256 ])


    # Mean position of this source
    ret.RA = racol.mean()
    ret.DEC = decol.mean()
    
    # Calculate the Stetson index...
    S, choice, stetson_nights = Stetson_machine (s_table, flags)
    
    ret.Stetson = S
    ret.Stetson_choice = choice
    ret.Stetson_N = stetson_nights

    if graded:
        # Calculate the graded Stetson index...
        g_S, g_choice, g_stetson_nights = (
            graded_Stetson_machine (s_table, flags) )
    
        ret.graded_Stetson = g_S
        ret.graded_Stetson_choice = g_choice
        ret.graded_Stetson_N = g_stetson_nights


    # Calculate PSTAR parameters
    ret.pstar_mean = s_table.PSTAR.mean()
    ret.pstar_median = np.median(s_table.PSTAR)
    ret.pstar_rms = s_table.PSTAR.std()

    # Create parallel data structures for each band, so we can iterate
    ret.j = Empty(); ret.j.data = jcol; ret.j.err = jerr; ret.j.date = jdate   
    ret.h = Empty(); ret.h.data = hcol; ret.h.err = herr; ret.h.date = hdate
    ret.k = Empty(); ret.k.data = kcol; ret.k.err = kerr; ret.k.date = kdate
    ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr 
    ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr
    ret.jmh.date = jmhdate; ret.hmk.date = hmkdate

    ret.j.N = ret.N_j ; ret.h.N = ret.N_h ; ret.k.N = ret.N_k
    ret.jmh.N = len(jmh_table) ; ret.hmk.N = len(hmk_table)

    bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ]

    for b in bands:
        # use b.data, b.err
        
        # if this band is empty, don't try to do the following assignments
        if b.N == 0: continue

        b.rchi2 = reduced_chisq( b.data, b.err )

        b.mean = b.data.mean()
        b.median = np.median(b.data) # dao
        b.rms = b.data.std()
        b.min = b.data.min()
        b.max = b.data.max()
        b.range = b.max - b.min

        b.err_mean = b.err.mean() #dao
        b.err_median = np.median(b.err) #dao
        b.err_rms = b.err.std() #dao
        b.err_min = b.err.min() #dao
        b.err_max = b.err.max() #dao
        b.err_range = b.err_max - b.err_min #dao


        # Robust quantifiers simply have an "r" at the end of their names
        if rob:
            b.datar, b.indr = rb.removeoutliers(b.data, 3, niter=2, retind=True)
            b.errr = b.err[b.indr]
            
            b.meanr = rb.meanr(b.data)
            b.medianr = rb.medianr(b.data) # dao
            b.rmsr = rb.stdr(b.data)
            b.minr = b.datar.min()
            b.maxr = b.datar.max()
            b.ranger = b.maxr - b.minr

            b.err_meanr = b.errr.mean() # dao
            b.err_medianr = np.median(b.errr) #dao
            b.err_rmsr = b.errr.std() #dao
            b.err_minr = b.errr.min() #dao
            b.err_maxr = b.errr.max() #dao
            b.err_ranger = b.err_maxr - b.err_minr #dao

        # Period finding... is a little dodgy still, and might take forever
        if per==True and b.N > 2:

            hifac = lsp_tuning(b.date)
            
            b.lsp = lsp(b.date, b.data, 6., hifac) 
            Jmax = lsp_mask(b.lsp[0], b.lsp[1])
            b.lsp_per = 1./ b.lsp[0][Jmax]
            b.lsp_pow = b.lsp[1][Jmax]
            b.lsp_sig = getSignificance(b.lsp[0], b.lsp[1], b.lsp[2], 6.)[Jmax]

            best_freq, chimin = test_analyze( b.date, b.data, b.err, 
                                              ret_chimin=True )

            b.fx2_per, b.fx2_chimin = 1./best_freq, chimin
            

    if colorslope:
        # J vs J-H : use jmh_table exclusively
        (ret.jjh_slope, a, ret.jjh_slope_err) = (
            slope( jmh_table.JMHPNT, jmh_table.JAPERMAG3, 
                   jmh_table.JMHPNTERR, jmh_table.JAPERMAG3ERR, 
                   verbose=False) )
        # K vs H-K : use hmk_table exclusively
        (ret.khk_slope, a, ret.khk_slope_err) = (
            slope( hmk_table.HMKPNT, hmk_table.KAPERMAG3, 
                   hmk_table.HMKPNTERR, hmk_table.KAPERMAG3ERR,
                   verbose=False) )
        # J-H vs H-K : use jhk_table exclusively
        (ret.jhk_slope, a, ret.jhk_slope_err) = (
            slope( jhk_table.HMKPNT, jhk_table.JMHPNT, 
                   jhk_table.HMKPNTERR, jhk_table.JMHPNTERR,
                   verbose=False) )
        
    # and the pp_max, using the messy table
    # (slated for a re-implementation)
    # ret.jpp_max = jppcol.max()
    # ret.hpp_max = hppcol.max()
    # ret.kpp_max = kppcol.max()

    return ret