Python removeoutliers Beispiele

Programmiersprache: Python

Namespace / Paketname: robust

Methode / Funktion: removeoutliers

Beispiele auf hotexamples.com: 2

Python removeoutliers - 2 Beispiele gefunden. Dies sind die am besten bewerteten Python Beispiele für die robust.removeoutliers, die aus Open Source-Projekten extrahiert wurden. Sie können Beispiele bewerten, um die Qualität der Beispiele zu verbessern.

Beispiel #1

Datei anzeigen

Datei: spreadsheet.py Projekt: tomr-stargazer/wuvars-proto

def arraystat_2 (table, sid, season=0, rob=True, per=True, flags=0) :
    """ Calculates a complicated number of parameters for a given star.

    Inputs:
      table -- an ATpy table with time-series photometry
      sid -- a WFCAM source ID.
      
    Optional inputs:
      season -- which season to select (1,2,3, or other=All)
      rob -- also use Robust statistics? (takes longer, default True)
      per -- run period-finding? (takes longer, default True)
      flags -- Maximum ppErrBit quality flags to use (default 0)

    Returns:
      ret -- a data structure containing the computed values.
      """
    
    s_table = data_cut( table, [sid], season=season, flags=flags )

    if len(s_table) < 1:
        print "no data for %d!" % sid
        return None
    
    jcol = s_table.JAPERMAG3; jerr = s_table.JAPERMAG3ERR
    hcol = s_table.HAPERMAG3; herr = s_table.HAPERMAG3ERR
    kcol = s_table.KAPERMAG3; kerr = s_table.KAPERMAG3ERR
    jmhcol=s_table.JMHPNT   ; jmherr = s_table.JMHPNTERR
    hmkcol=s_table.HMKPNT   ; hmkerr = s_table.HMKPNTERR
    racol= s_table.RA
    decol= s_table.DEC

    date = s_table.MEANMJDOBS 

    messy_table = data_cut( table, [sid], season=-1 )
    jppcol=messy_table.JPPERRBITS
    hppcol=messy_table.HPPERRBITS
    kppcol=messy_table.KPPERRBITS

    # make an empty data structure and just assign it information, then return 
    # the object itself!!! then there's no more worrying about indices.
    class Empty():
        pass

    ret = Empty()
    
    ret.N = len(s_table)
    ret.RA = racol.mean()
    ret.DEC = decol.mean()
    
    ret.chip = get_chip(date[0], np.degrees(racol[0]), np.degrees(decol[0]))
    if ret.N > 4:
        ret.one_chip = ( get_chip(date[0], racol[0], decol[0]) ==
                         get_chip(date[1], racol[1], decol[1]) ==
                         get_chip(date[2], racol[2], decol[2]) ==
                         get_chip(date[3], racol[3], decol[3]) )
    else:
        ret.one_chip = True
    
    ret.Stetson = stetson.S(jcol, jerr, hcol, herr, kcol, kerr)
    
    ret.j = Empty();   ret.j.data = jcol;   ret.j.err = jerr
    ret.h = Empty();   ret.h.data = hcol;   ret.h.err = herr
    ret.k = Empty();   ret.k.data = kcol;   ret.k.err = kerr
    ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr
    ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr

    bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ]

    for b in bands:
        # use b.data, b.err
        
        b.rchi2 = reduced_chisq( b.data, b.err )

        b.mean = b.data.mean()
        b.rms = b.data.std()
        b.min = b.data.min()
        b.max = b.data.max()
        b.peak_trough = b.max - b.min

        b.mean_err = b.err.mean()

        # Robust quantifiers simply have an "r" at the end of their names
        if rob:
            b.datar = rb.removeoutliers(b.data, 3, niter=2)
            
            b.meanr = rb.meanr(b.data)
            b.rmsr = rb.stdr(b.data)
            b.minr = b.datar.min()
            b.maxr = b.datar.max()
            b.peak_troughr = b.maxr - b.minr

        # Period finding... is a little dodgy still, and might take forever
        if per:
            
            b.lsp = lsp(date, b.data, 6., 6.) # apologies if this is cluttered
            Jmax = lsp_mask(b.lsp[0], b.lsp[1])
            b.lsp_per = 1./ b.lsp[0][Jmax]
            b.lsp_pow = b.lsp[1][Jmax]
            b.fx2_per = 1./ test_analyze( date, b.data, b.err )

    # Finally we'll want to do the whole slope, distance on the JMH graph
    # (until I get the fitting done, we'll have to use hmk and jmh naively)
    ret.color_slope = (ret.jmh.peak_trough / ret.hmk.peak_trough)
    


    # and the pp_max, using the messy table
    ret.jpp_max = jppcol.max()
    ret.hpp_max = hppcol.max()
    ret.kpp_max = kppcol.max()

    return ret

Beispiel #2

Datei anzeigen

Datei: spread3.py Projekt: tomr-stargazer/wuvars-proto

def statcruncher (table, sid, season=0, rob=True, per=True, 
                  graded=False, colorslope=False, flags=0) :
    """ 
    Calculates several statistical properties for a given star.

    Will work with "lonely" datapoints (i.e. not all JHK mags are 
    well-defined). Optionally works with graded data, too!

    Parameters
    ----------
    table : atpy.Table
        Table with time-series photometry
    sid : int
        13-digit WFCAM source ID of star to plot
    season : int, optional
        Which observing season of our dataset (1, 2, 3, or all).
        Any value that is not the integers (1, 2, or 3) will be 
        treated as "no season", and no time-cut will be made.
        Note that this is the default behavior.
    rob : bool, optional 
        Use robust statistics, in addition to normal ones?
        (takes longer, default True)
    per : bool, optional 
        Run period-finding? Uses fast chi-squared and lomb-scargle.
        (takes longer, default True)
    graded : bool, optional
        Also calculate Stetson indices using quality grades as weights?
        Uses stetson_graded; requires that the data has been graded by
        night_cleanser.null_cleanser_grader().
    colorslope : bool, optional
        Calculate color slopes? Runs them over (JvJ-H, KvH-K, J-HvH-K).
        Make sure your data has been color-error-corrected! Default False.
    flags : int, optional 
        Maximum ppErrBit quality flags to use (default 0)

    Returns
    -------
    ret : data structure 
        Contains the computed values.
        They can be accessed as attributes 
        (e.g., "ret.j_mean" or "ret.Stetson").

    """
    
    s_table = data_cut ( table, sid, season=season)

    if len(s_table) < 1:
        print "no data for %d!" % sid
        return None

    # First, let's compute single-band statistics. This will require
    # separate data_cuts on each band.

    full_jtable = band_cut(s_table, 'j')
    full_htable = band_cut(s_table, 'h')
    full_ktable = band_cut(s_table, 'k')

    j_table = band_cut(s_table, 'j', max_flag=flags)
    h_table = band_cut(s_table, 'h', max_flag=flags)
    k_table = band_cut(s_table, 'k', max_flag=flags)

    jmh_table = band_cut(j_table, 'h', max_flag=flags)
    hmk_table = band_cut(h_table, 'k', max_flag=flags)
    
    # jhk_table used only for colorslope
    jhk_table = band_cut( jmh_table, 'k', max_flag=flags)

    # get a date (x-axis) for each 
    jdate = j_table.MEANMJDOBS
    hdate = h_table.MEANMJDOBS
    kdate = k_table.MEANMJDOBS
    jmhdate = jmh_table.MEANMJDOBS
    hmkdate = hmk_table.MEANMJDOBS
#    date = s_table.MEANMJDOBS 
    
    # get a magnitude and magnitude error for each band
    jcol = j_table.JAPERMAG3; jerr = j_table.JAPERMAG3ERR
    hcol = h_table.HAPERMAG3; herr = h_table.HAPERMAG3ERR
    kcol = k_table.KAPERMAG3; kerr = k_table.KAPERMAG3ERR
    jmhcol= jmh_table.JMHPNT; jmherr = jmh_table.JMHPNTERR
    hmkcol= hmk_table.HMKPNT; hmkerr = hmk_table.HMKPNTERR

    # get the RA and DEC columns, checking for sensible values
    racol= s_table.RA[(s_table.RA > 0) & (s_table.RA < 7)]
    decol= s_table.DEC[(s_table.DEC > -4) & (s_table.DEC < 4)]

    # Now let's get some ability to track errorful data.
    # messy_table_j = band_cut( s_table, 'j')
    # messy_table_h = band_cut( s_table, 'h')
    # messy_table_k = band_cut( s_table, 'k')
    # jppcol = messy_table_j.JPPERRBITS
    # hppcol = messy_table_h.HPPERRBITS
    # kppcol = messy_table_k.KPPERRBITS

    # make an empty data structure and just assign it information, then return 
    # the object itself! then there's no more worrying about indices.
    class Empty():
        pass

    ret = Empty()
    
    # How many nights have observations in each band?
    ret.N_j = len(j_table)
    ret.N_h = len(h_table)
    ret.N_k = len(k_table)

    # What's the distribution of flags and nights?
    js = full_jtable.JPPERRBITS
    hs = full_htable.HPPERRBITS
    ks = full_ktable.KPPERRBITS

    ret.N_j_noflag = len(js[js == 0])
    ret.N_h_noflag = len(hs[hs == 0])
    ret.N_k_noflag = len(ks[ks == 0])

    ret.N_j_info = len(js[(js < 256) & (js > 0)])
    ret.N_h_info = len(hs[(hs < 256) & (hs > 0)])
    ret.N_k_info = len(ks[(ks < 256) & (ks > 0)])

    ret.N_j_warn = len(js[ js >= 256 ])
    ret.N_h_warn = len(hs[ hs >= 256 ])
    ret.N_k_warn = len(ks[ ks >= 256 ])


    # Mean position of this source
    ret.RA = racol.mean()
    ret.DEC = decol.mean()
    
    # Calculate the Stetson index...
    S, choice, stetson_nights = Stetson_machine (s_table, flags)
    
    ret.Stetson = S
    ret.Stetson_choice = choice
    ret.Stetson_N = stetson_nights

    if graded:
        # Calculate the graded Stetson index...
        g_S, g_choice, g_stetson_nights = (
            graded_Stetson_machine (s_table, flags) )
    
        ret.graded_Stetson = g_S
        ret.graded_Stetson_choice = g_choice
        ret.graded_Stetson_N = g_stetson_nights


    # Calculate PSTAR parameters
    ret.pstar_mean = s_table.PSTAR.mean()
    ret.pstar_median = np.median(s_table.PSTAR)
    ret.pstar_rms = s_table.PSTAR.std()

    # Create parallel data structures for each band, so we can iterate
    ret.j = Empty(); ret.j.data = jcol; ret.j.err = jerr; ret.j.date = jdate   
    ret.h = Empty(); ret.h.data = hcol; ret.h.err = herr; ret.h.date = hdate
    ret.k = Empty(); ret.k.data = kcol; ret.k.err = kerr; ret.k.date = kdate
    ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr 
    ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr
    ret.jmh.date = jmhdate; ret.hmk.date = hmkdate

    ret.j.N = ret.N_j ; ret.h.N = ret.N_h ; ret.k.N = ret.N_k
    ret.jmh.N = len(jmh_table) ; ret.hmk.N = len(hmk_table)

    bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ]

    for b in bands:
        # use b.data, b.err
        
        # if this band is empty, don't try to do the following assignments
        if b.N == 0: continue

        b.rchi2 = reduced_chisq( b.data, b.err )

        b.mean = b.data.mean()
        b.median = np.median(b.data) # dao
        b.rms = b.data.std()
        b.min = b.data.min()
        b.max = b.data.max()
        b.range = b.max - b.min

        b.err_mean = b.err.mean() #dao
        b.err_median = np.median(b.err) #dao
        b.err_rms = b.err.std() #dao
        b.err_min = b.err.min() #dao
        b.err_max = b.err.max() #dao
        b.err_range = b.err_max - b.err_min #dao


        # Robust quantifiers simply have an "r" at the end of their names
        if rob:
            b.datar, b.indr = rb.removeoutliers(b.data, 3, niter=2, retind=True)
            b.errr = b.err[b.indr]
            
            b.meanr = rb.meanr(b.data)
            b.medianr = rb.medianr(b.data) # dao
            b.rmsr = rb.stdr(b.data)
            b.minr = b.datar.min()
            b.maxr = b.datar.max()
            b.ranger = b.maxr - b.minr

            b.err_meanr = b.errr.mean() # dao
            b.err_medianr = np.median(b.errr) #dao
            b.err_rmsr = b.errr.std() #dao
            b.err_minr = b.errr.min() #dao
            b.err_maxr = b.errr.max() #dao
            b.err_ranger = b.err_maxr - b.err_minr #dao

        # Period finding... is a little dodgy still, and might take forever
        if per==True and b.N > 2:

            hifac = lsp_tuning(b.date)
            
            b.lsp = lsp(b.date, b.data, 6., hifac) 
            Jmax = lsp_mask(b.lsp[0], b.lsp[1])
            b.lsp_per = 1./ b.lsp[0][Jmax]
            b.lsp_pow = b.lsp[1][Jmax]
            b.lsp_sig = getSignificance(b.lsp[0], b.lsp[1], b.lsp[2], 6.)[Jmax]

            best_freq, chimin = test_analyze( b.date, b.data, b.err, 
                                              ret_chimin=True )

            b.fx2_per, b.fx2_chimin = 1./best_freq, chimin
            

    if colorslope:
        # J vs J-H : use jmh_table exclusively
        (ret.jjh_slope, a, ret.jjh_slope_err) = (
            slope( jmh_table.JMHPNT, jmh_table.JAPERMAG3, 
                   jmh_table.JMHPNTERR, jmh_table.JAPERMAG3ERR, 
                   verbose=False) )
        # K vs H-K : use hmk_table exclusively
        (ret.khk_slope, a, ret.khk_slope_err) = (
            slope( hmk_table.HMKPNT, hmk_table.KAPERMAG3, 
                   hmk_table.HMKPNTERR, hmk_table.KAPERMAG3ERR,
                   verbose=False) )
        # J-H vs H-K : use jhk_table exclusively
        (ret.jhk_slope, a, ret.jhk_slope_err) = (
            slope( jhk_table.HMKPNT, jhk_table.JMHPNT, 
                   jhk_table.HMKPNTERR, jhk_table.JMHPNTERR,
                   verbose=False) )
        
    # and the pp_max, using the messy table
    # (slated for a re-implementation)
    # ret.jpp_max = jppcol.max()
    # ret.hpp_max = hppcol.max()
    # ret.kpp_max = kppcol.max()

    return ret