def remove_nights2 (table, analysis): ''' Removes nights from a table based on the output of analyze_nights2. Must be called after analyze_nights2. Parameters ---------- table : atpy.Table a data table of time-series photometry from the WFCAM Science Archive (WSA). Must include all magnitudes, colors, color errors, ppErrBits, and MeanMjdObs information. analysis : tuple The output of analyze_nights2. Returns ------- newtable : atpy.Table The same data table, with all data from bad nights removed. badnights : ndarray The list of timestamps that were rejected. cuts : list of tuples The parameters used for rejecting nights. Order of each tuple: (mean_hmk, mean_jmh, spread_hmk, spread_jmh) ''' # First, let's relabel analyze_nights2' output. nights, hmk, jmh = analysis #this depends on what analyze_nights outputs master_cloudy = [] cuts = [] for night_set, mean_hmk, mean_jmh in zip(nights, hmk, jmh): # Let's call in the big guns... robust statistics and outlier clipping # or, you know, some way to return the outliers. # Get the robust standard deviation and then write down everyone above. middle_hmk = rb.meanr(mean_hmk) middle_jmh = rb.meanr(mean_jmh) spread_hmk = rb.stdr(mean_hmk) spread_jmh = rb.stdr(mean_jmh) ellipse = np.sqrt( ((mean_hmk - middle_hmk)/spread_hmk)**2 + ((mean_jmh - middle_jmh)/spread_jmh)**2 ) cloudy = night_set[ ellipse > 3 ] print "let's test this." print "middle: ", middle_hmk print "spread: ", spread_hmk print "n_outliers: ", cloudy.size # now let's save cloudy master_cloudy.extend(cloudy) cuts.append((middle_hmk, middle_jmh, spread_hmk, spread_jmh)) # now let's remove the nights clean_data = table.where( np.array([night not in master_cloudy for night in table.MEANMJDOBS]) ) return clean_data, master_cloudy, cuts
def statcruncher (table, sid, season=0, rob=True, per=True, graded=False, colorslope=False, flags=0) : """ Calculates several statistical properties for a given star. Will work with "lonely" datapoints (i.e. not all JHK mags are well-defined). Optionally works with graded data, too! Parameters ---------- table : atpy.Table Table with time-series photometry sid : int 13-digit WFCAM source ID of star to plot season : int, optional Which observing season of our dataset (1, 2, 3, or all). Any value that is not the integers (1, 2, or 3) will be treated as "no season", and no time-cut will be made. Note that this is the default behavior. rob : bool, optional Use robust statistics, in addition to normal ones? (takes longer, default True) per : bool, optional Run period-finding? Uses fast chi-squared and lomb-scargle. (takes longer, default True) graded : bool, optional Also calculate Stetson indices using quality grades as weights? Uses stetson_graded; requires that the data has been graded by night_cleanser.null_cleanser_grader(). colorslope : bool, optional Calculate color slopes? Runs them over (JvJ-H, KvH-K, J-HvH-K). Make sure your data has been color-error-corrected! Default False. flags : int, optional Maximum ppErrBit quality flags to use (default 0) Returns ------- ret : data structure Contains the computed values. They can be accessed as attributes (e.g., "ret.j_mean" or "ret.Stetson"). """ s_table = data_cut ( table, sid, season=season) if len(s_table) < 1: print "no data for %d!" % sid return None # First, let's compute single-band statistics. This will require # separate data_cuts on each band. full_jtable = band_cut(s_table, 'j') full_htable = band_cut(s_table, 'h') full_ktable = band_cut(s_table, 'k') j_table = band_cut(s_table, 'j', max_flag=flags) h_table = band_cut(s_table, 'h', max_flag=flags) k_table = band_cut(s_table, 'k', max_flag=flags) jmh_table = band_cut(j_table, 'h', max_flag=flags) hmk_table = band_cut(h_table, 'k', max_flag=flags) # jhk_table used only for colorslope jhk_table = band_cut( jmh_table, 'k', max_flag=flags) # get a date (x-axis) for each jdate = j_table.MEANMJDOBS hdate = h_table.MEANMJDOBS kdate = k_table.MEANMJDOBS jmhdate = jmh_table.MEANMJDOBS hmkdate = hmk_table.MEANMJDOBS # date = s_table.MEANMJDOBS # get a magnitude and magnitude error for each band jcol = j_table.JAPERMAG3; jerr = j_table.JAPERMAG3ERR hcol = h_table.HAPERMAG3; herr = h_table.HAPERMAG3ERR kcol = k_table.KAPERMAG3; kerr = k_table.KAPERMAG3ERR jmhcol= jmh_table.JMHPNT; jmherr = jmh_table.JMHPNTERR hmkcol= hmk_table.HMKPNT; hmkerr = hmk_table.HMKPNTERR # get the RA and DEC columns, checking for sensible values racol= s_table.RA[(s_table.RA > 0) & (s_table.RA < 7)] decol= s_table.DEC[(s_table.DEC > -4) & (s_table.DEC < 4)] # Now let's get some ability to track errorful data. # messy_table_j = band_cut( s_table, 'j') # messy_table_h = band_cut( s_table, 'h') # messy_table_k = band_cut( s_table, 'k') # jppcol = messy_table_j.JPPERRBITS # hppcol = messy_table_h.HPPERRBITS # kppcol = messy_table_k.KPPERRBITS # make an empty data structure and just assign it information, then return # the object itself! then there's no more worrying about indices. class Empty(): pass ret = Empty() # How many nights have observations in each band? ret.N_j = len(j_table) ret.N_h = len(h_table) ret.N_k = len(k_table) # What's the distribution of flags and nights? js = full_jtable.JPPERRBITS hs = full_htable.HPPERRBITS ks = full_ktable.KPPERRBITS ret.N_j_noflag = len(js[js == 0]) ret.N_h_noflag = len(hs[hs == 0]) ret.N_k_noflag = len(ks[ks == 0]) ret.N_j_info = len(js[(js < 256) & (js > 0)]) ret.N_h_info = len(hs[(hs < 256) & (hs > 0)]) ret.N_k_info = len(ks[(ks < 256) & (ks > 0)]) ret.N_j_warn = len(js[ js >= 256 ]) ret.N_h_warn = len(hs[ hs >= 256 ]) ret.N_k_warn = len(ks[ ks >= 256 ]) # Mean position of this source ret.RA = racol.mean() ret.DEC = decol.mean() # Calculate the Stetson index... S, choice, stetson_nights = Stetson_machine (s_table, flags) ret.Stetson = S ret.Stetson_choice = choice ret.Stetson_N = stetson_nights if graded: # Calculate the graded Stetson index... g_S, g_choice, g_stetson_nights = ( graded_Stetson_machine (s_table, flags) ) ret.graded_Stetson = g_S ret.graded_Stetson_choice = g_choice ret.graded_Stetson_N = g_stetson_nights # Calculate PSTAR parameters ret.pstar_mean = s_table.PSTAR.mean() ret.pstar_median = np.median(s_table.PSTAR) ret.pstar_rms = s_table.PSTAR.std() # Create parallel data structures for each band, so we can iterate ret.j = Empty(); ret.j.data = jcol; ret.j.err = jerr; ret.j.date = jdate ret.h = Empty(); ret.h.data = hcol; ret.h.err = herr; ret.h.date = hdate ret.k = Empty(); ret.k.data = kcol; ret.k.err = kerr; ret.k.date = kdate ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr ret.jmh.date = jmhdate; ret.hmk.date = hmkdate ret.j.N = ret.N_j ; ret.h.N = ret.N_h ; ret.k.N = ret.N_k ret.jmh.N = len(jmh_table) ; ret.hmk.N = len(hmk_table) bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ] for b in bands: # use b.data, b.err # if this band is empty, don't try to do the following assignments if b.N == 0: continue b.rchi2 = reduced_chisq( b.data, b.err ) b.mean = b.data.mean() b.median = np.median(b.data) # dao b.rms = b.data.std() b.min = b.data.min() b.max = b.data.max() b.range = b.max - b.min b.err_mean = b.err.mean() #dao b.err_median = np.median(b.err) #dao b.err_rms = b.err.std() #dao b.err_min = b.err.min() #dao b.err_max = b.err.max() #dao b.err_range = b.err_max - b.err_min #dao # Robust quantifiers simply have an "r" at the end of their names if rob: b.datar, b.indr = rb.removeoutliers(b.data, 3, niter=2, retind=True) b.errr = b.err[b.indr] b.meanr = rb.meanr(b.data) b.medianr = rb.medianr(b.data) # dao b.rmsr = rb.stdr(b.data) b.minr = b.datar.min() b.maxr = b.datar.max() b.ranger = b.maxr - b.minr b.err_meanr = b.errr.mean() # dao b.err_medianr = np.median(b.errr) #dao b.err_rmsr = b.errr.std() #dao b.err_minr = b.errr.min() #dao b.err_maxr = b.errr.max() #dao b.err_ranger = b.err_maxr - b.err_minr #dao # Period finding... is a little dodgy still, and might take forever if per==True and b.N > 2: hifac = lsp_tuning(b.date) b.lsp = lsp(b.date, b.data, 6., hifac) Jmax = lsp_mask(b.lsp[0], b.lsp[1]) b.lsp_per = 1./ b.lsp[0][Jmax] b.lsp_pow = b.lsp[1][Jmax] b.lsp_sig = getSignificance(b.lsp[0], b.lsp[1], b.lsp[2], 6.)[Jmax] best_freq, chimin = test_analyze( b.date, b.data, b.err, ret_chimin=True ) b.fx2_per, b.fx2_chimin = 1./best_freq, chimin if colorslope: # J vs J-H : use jmh_table exclusively (ret.jjh_slope, a, ret.jjh_slope_err) = ( slope( jmh_table.JMHPNT, jmh_table.JAPERMAG3, jmh_table.JMHPNTERR, jmh_table.JAPERMAG3ERR, verbose=False) ) # K vs H-K : use hmk_table exclusively (ret.khk_slope, a, ret.khk_slope_err) = ( slope( hmk_table.HMKPNT, hmk_table.KAPERMAG3, hmk_table.HMKPNTERR, hmk_table.KAPERMAG3ERR, verbose=False) ) # J-H vs H-K : use jhk_table exclusively (ret.jhk_slope, a, ret.jhk_slope_err) = ( slope( jhk_table.HMKPNT, jhk_table.JMHPNT, jhk_table.HMKPNTERR, jhk_table.JMHPNTERR, verbose=False) ) # and the pp_max, using the messy table # (slated for a re-implementation) # ret.jpp_max = jppcol.max() # ret.hpp_max = hppcol.max() # ret.kpp_max = kppcol.max() return ret
def arraystat_2 (table, sid, season=0, rob=True, per=True, flags=0) : """ Calculates a complicated number of parameters for a given star. Inputs: table -- an ATpy table with time-series photometry sid -- a WFCAM source ID. Optional inputs: season -- which season to select (1,2,3, or other=All) rob -- also use Robust statistics? (takes longer, default True) per -- run period-finding? (takes longer, default True) flags -- Maximum ppErrBit quality flags to use (default 0) Returns: ret -- a data structure containing the computed values. """ s_table = data_cut( table, [sid], season=season, flags=flags ) if len(s_table) < 1: print "no data for %d!" % sid return None jcol = s_table.JAPERMAG3; jerr = s_table.JAPERMAG3ERR hcol = s_table.HAPERMAG3; herr = s_table.HAPERMAG3ERR kcol = s_table.KAPERMAG3; kerr = s_table.KAPERMAG3ERR jmhcol=s_table.JMHPNT ; jmherr = s_table.JMHPNTERR hmkcol=s_table.HMKPNT ; hmkerr = s_table.HMKPNTERR racol= s_table.RA decol= s_table.DEC date = s_table.MEANMJDOBS messy_table = data_cut( table, [sid], season=-1 ) jppcol=messy_table.JPPERRBITS hppcol=messy_table.HPPERRBITS kppcol=messy_table.KPPERRBITS # make an empty data structure and just assign it information, then return # the object itself!!! then there's no more worrying about indices. class Empty(): pass ret = Empty() ret.N = len(s_table) ret.RA = racol.mean() ret.DEC = decol.mean() ret.chip = get_chip(date[0], np.degrees(racol[0]), np.degrees(decol[0])) if ret.N > 4: ret.one_chip = ( get_chip(date[0], racol[0], decol[0]) == get_chip(date[1], racol[1], decol[1]) == get_chip(date[2], racol[2], decol[2]) == get_chip(date[3], racol[3], decol[3]) ) else: ret.one_chip = True ret.Stetson = stetson.S(jcol, jerr, hcol, herr, kcol, kerr) ret.j = Empty(); ret.j.data = jcol; ret.j.err = jerr ret.h = Empty(); ret.h.data = hcol; ret.h.err = herr ret.k = Empty(); ret.k.data = kcol; ret.k.err = kerr ret.jmh = Empty(); ret.jmh.data=jmhcol; ret.jmh.err = jmherr ret.hmk = Empty(); ret.hmk.data=hmkcol; ret.hmk.err = hmkerr bands = [ ret.j, ret.h, ret.k, ret.jmh, ret.hmk ] for b in bands: # use b.data, b.err b.rchi2 = reduced_chisq( b.data, b.err ) b.mean = b.data.mean() b.rms = b.data.std() b.min = b.data.min() b.max = b.data.max() b.peak_trough = b.max - b.min b.mean_err = b.err.mean() # Robust quantifiers simply have an "r" at the end of their names if rob: b.datar = rb.removeoutliers(b.data, 3, niter=2) b.meanr = rb.meanr(b.data) b.rmsr = rb.stdr(b.data) b.minr = b.datar.min() b.maxr = b.datar.max() b.peak_troughr = b.maxr - b.minr # Period finding... is a little dodgy still, and might take forever if per: b.lsp = lsp(date, b.data, 6., 6.) # apologies if this is cluttered Jmax = lsp_mask(b.lsp[0], b.lsp[1]) b.lsp_per = 1./ b.lsp[0][Jmax] b.lsp_pow = b.lsp[1][Jmax] b.fx2_per = 1./ test_analyze( date, b.data, b.err ) # Finally we'll want to do the whole slope, distance on the JMH graph # (until I get the fitting done, we'll have to use hmk and jmh naively) ret.color_slope = (ret.jmh.peak_trough / ret.hmk.peak_trough) # and the pp_max, using the messy table ret.jpp_max = jppcol.max() ret.hpp_max = hppcol.max() ret.kpp_max = kppcol.max() return ret
def make_corrections_table ( constants, table ): ''' Creates a table of photometric corrections per chip per night. Inputs: constants -- an ATpy table which gives 10 constant stars per chip. Columns: "SOURCEID" (13-digit int), "chip" (1-16 int) table -- an ATpy table with time-series photometry Returns: an ATpy table with the following format: THE CORRECTIONS TABLE: night chip correction_J corr_H corr_K '54582.6251067' 3 +0.13 +0.07 -0.03 ''' # rb.meanr(x) is the robust mean # First - let's compute every constant star's robust mean in each band. # And keep track of them. j_meanr = np.zeros(constants.SOURCEID.size) * 1. h_meanr = np.zeros(constants.SOURCEID.size) * 1. k_meanr = np.zeros(constants.SOURCEID.size) * 1. for sid, i in zip(constants.SOURCEID, range(constants.SOURCEID.size)): stable = season_cut(table, sid, 123, flags=0) j_meanr[i] = rb.meanr( stable.JAPERMAG3 ) h_meanr[i] = rb.meanr( stable.HAPERMAG3 ) k_meanr[i] = rb.meanr( stable.KAPERMAG3 ) del stable try: constants.add_column('j_meanr', j_meanr) constants.add_column('h_meanr', h_meanr) constants.add_column('k_meanr', k_meanr) print "computed robust-mean magnitude for each constant star" except: print "looks like you already computed robust-mean magnitudes" # Second - Calculate mean(r) deviations for each chip for each night chip_list = list( set( constants.chip ) ) corrections_list = [] # add tables to this list, join them up at the end for chip in chip_list: local_network = constants.where(constants.chip == chip) # so I'm taking all of the local stars, and for every night... # do i calculate it by each star first, or each night first? # each night first I think would work better. #let's grab a slice of the big table corresponding only to our # favorite sources' photometry. # by joining together the season_cuts from all 10 sources! # no that's stupid. Use an "or |" operator! this is gonna be painful cids= local_network.SOURCEID ids = table.SOURCEID #local_table = season_cut( table, local_network[0], 123 ) local_table = data_cut( table, cids, 123, flags=0 ) # aww yeah # local_table = table.where( ( (ids == cids[0]) | # I really, really wish # (ids == cids[1]) | # I knew how to make # (ids == cids[2]) | # this more elegant. # (ids == cids[3]) | # (ids == cids[4]) | # (ids == cids[5]) | # (ids == cids[6]) | # (ids == cids[7]) | # (ids == cids[8]) | # (ids == cids[9]) ) & # (table.JPPERRBITS <= 0) & # (table.HPPERRBITS <= 0) & # (table.KPPERRBITS <= 0) ) # okay, now that i've got the local table... let's get each night's # meanr deviation. # first, let's make some dates to iterate through date_list = list( set( local_table.MEANMJDOBS ) ) # at some point i need to make a structure to save the corrections to ld = len(date_list) date_arr = np.zeros(ld) j_correction = np.zeros(ld) h_correction = np.zeros(ld) k_correction = np.zeros(ld) chip_arr = chip * np.ones(ld, dtype=int) # get each night's correction! for date, j in zip( date_list, range(ld) ): # a temporary place to keep the individual deviations j_deviation = np.zeros_like(cids) * 1. h_deviation = np.zeros_like(cids) * 1. k_deviation = np.zeros_like(cids) * 1. for star, i in zip(cids, range(cids.size) ): star_night_row = local_table.where( (local_table.SOURCEID == star) & (local_table.MEANMJDOBS == date) ) # deviation: the meanr minus that night's magnitude. j_deviation[i] = (constants.j_meanr[constants.SOURCEID==star]- star_night_row.JAPERMAG3 ) h_deviation[i] = (constants.h_meanr[constants.SOURCEID==star]- star_night_row.HAPERMAG3 ) k_deviation[i] = (constants.k_meanr[constants.SOURCEID==star]- star_night_row.KAPERMAG3 ) date_arr[j] = date j_correction[j] = -rb.meanr(j_deviation) h_correction[j] = -rb.meanr(h_deviation) k_correction[j] = -rb.meanr(k_deviation) # make a table for each chip, and (at the end) # add it to the corrections_list. We'll join them up at the end. correction_subtable = atpy.Table(name="The Corrections Table") # add_column( 'name', data ) correction_subtable.add_column('date', date_arr) correction_subtable.add_column('chip', chip_arr) correction_subtable.add_column('j_correction', j_correction) correction_subtable.add_column('h_correction', h_correction) correction_subtable.add_column('k_correction', k_correction) corrections_list.append( correction_subtable ) #whoo, finally! correction_table = corrections_list[0] for subtable in corrections_list[1:] : correction_table.append( subtable ) return correction_table '''