def create_trend_plots(ccd_id, m_list, c_list, fits, acis_prefix, omode, sk=0): """ create trend plot and save in a file input: ccd_id --- a list of ccd id m_list --- a list of bin positions c_list --- a list of estimated center positions acis_prefix --- a prefix of the file fits --- a fits file name omode --- a data ode sk --- is this skewed plot? 0: no, 1: yes output: <plot_idr>/<acis_prefix>_<type>.png """ # #--- now create trend plots # plt.close('all') for k in range(0, 4): # #--- set the plot position (there are 4 trend plots) # k1 = k + 1 plt.subplot(4, 1, k1) x = m_list[k] y = c_list[k] try: [intc, slope, err] = robust.least_sq(x, y) except: intc = 0.0 slope = 0.0 title = fits + ' ' + ccd_id[k] + ' ' + omode plot_trend(x, y, title, intc, slope, sk) if omode == 'afaint': tail = 'acp.png' else: tail = 'cp.png' if sk == 0: skt = 'norm_' else: skt = 'skew_' # #--- save the plot # odir = plot_dir + 'Ind_Plots/' + acis_prefix + 'plots' cmd = 'mkdir -p ' + odir os.system(cmd) outname = odir + '/' + acis_prefix + skt + tail fig = matplotlib.pyplot.gcf() fig.set_size_inches(10.0, 10.0) plt.savefig(outname, format='png', dpi=200)
def get_slope_and_str(t_array, d_array, start, stop, cind=0): """ compute fitted slope and std of the data input: t_array --- an array of time d_datay --- an array of data start --- period starting time in seconds from 1998.1.1 stop --- period stopping time in seconds from 1998.1.1 output: slope --- slope std --- standard deviation of d_array """ # #--- select data for the given time period # idx = (t_array >= start) & (t_array < stop) t_array = t_array[idx] d_array = d_array[idx] # #--- compute the stats only when there are more than 3 data points # if len(t_array) > 2: # #--- convert to ydate # if cind == 0: t_array = convert_to_ydate_list(t_array) # #--- convert to fractional year # else: t_array = convert_to_fyear_list(t_array) # #--- rlf.least_sq reaturn [<intersect>, <slope>, <err of slope>] # out = rlf.least_sq(t_array, d_array) std = numpy.std(d_array) slope = '%2.3e' % out[1] std = '%2.3e' % std # #--- otherwise return 'nan' # else: slope = 'nan' std = 'nan' return [slope, std]
def update_stat_table(this_year='', this_mon=''): """ update magnitude related stat table data files input: none, but read from: acis_<#>, hrc_i_<#>, hrc_s_<#> for full range average mag_i_avg_<#> all others output: monthly_mag_stats --- monthly stats yearly_mag_stats --- yearly stats recent_mag_stats --- most recent one year full_mag_stats --- entire period stats are fitted linear slope and std of the data """ # #--- find the current year and month # if this_year == '': out = time.strftime('%Y:%m:%d', time.gmtime()) atemp = re.split(':', out) this_year = int(float(atemp[0])) this_mon = int(float(atemp[1])) this_day = int(float(atemp[2])) if this_day < 5: this_mon -= 1 if this_mon < 1: this_mon = 12 this_year -= 1 # #--- initialize lists for monthly and yearly data saving # mtime = [] mslope = [] mstd = [] ytime = [] yslope = [] ystd = [] # #--- there are 14 entries, named mag_i_avg_1... the first slot won't be used # for k in range(0, 15): yslope.append([]) ystd.append([]) mslope.append([]) mstd.append([]) # #--- initialize for recent one year stats saving # rslope = '' rstd = '' # #--- go through each data set # for k in range(1, 15): ifile = data_dir + 'mag_i_avg_' + str(k) [t_array, d_array] = read_in_data(ifile) # #--- recent one year # if len(t_array) > 2: r_cut = t_array[-1] - oneyear [slope, std] = get_slope_and_str(t_array, d_array, r_cut, t_array[-1]) rslope = rslope + slope + '\t' rstd = rstd + std + '\t' else: rslope = rslope + 'na\t' rstd = rstd + 'na\t' # #--- yearly # for year in range(1999, this_year + 1): if k == 1: ytime.append(str(year)) start = int( Chandra.Time.DateTime(str(year) + ':001:00:00:00').secs) stop = int( Chandra.Time.DateTime(str(year + 1) + ':001:00:00:00').secs) if len(t_array) > 2: [slope, std] = get_slope_and_str(t_array, d_array, start, stop) else: [slope, std] = ['nan', 'nan'] yslope[k].append(slope) ystd[k].append(std) # #--- monthly # for year in range(1999, this_year + 1): for month in range(1, 13): if year == 1999 and month < 8: continue if year == this_year and month > this_mon: break if k == 1: mtime.append(str(year) + ':' + mcf.add_leading_zero(month)) nyear = year nmonth = month + 1 if nmonth > 12: nmonth = 1 nyear += 1 start = convert_mday_to_stime(year, month, 1) stop = convert_mday_to_stime(nyear, nmonth, 1) if len(t_array) > 2: [slope, std] = get_slope_and_str(t_array, d_array, start, stop) else: [slope, std] = ['nan', 'nan'] mslope[k].append(slope) mstd[k].append(std) # #--- now update the data files # # #--- most recent one year # line = rslope + rstd + '\n' rout = data_dir + 'recent_mag_stats' with open(rout, 'w') as fo: fo.write(line) # #--- yearly # line = '' for k in range(0, len(ytime)): line = line + ytime[k] + '\t' for m in range(1, 15): line = line + yslope[m][k] + '\t' for m in range(1, 15): line = line + ystd[m][k] + '\t' line = line + '\n' yout = data_dir + 'yearly_mag_stats' with open(yout, 'w') as fo: fo.write(line) # #--- monthly # line = '' for k in range(0, len(mtime)): line = line + mtime[k] + '\t' for m in range(1, 15): line = line + mslope[m][k] + '\t' for m in range(1, 15): line = line + mstd[m][k] + '\t' line = line + '\n' mout = data_dir + 'monthly_mag_stats' with open(mout, 'w') as fo: fo.write(line) # #--- full range stats computation uses different data sets which are already averaged on each month #--- first create data file list # dfile_list = [] for i in range(1, 7): ifile = data_dir + 'acis_' + str(i) dfile_list.append(ifile) for i in range(1, 5): ifile = data_dir + 'hrc_i_' + str(i) dfile_list.append(ifile) for i in range(1, 5): ifile = data_dir + 'hrc_s_' + str(i) dfile_list.append(ifile) slp_line = '' std_line = '' for ifile in dfile_list: [t_array, d_array] = read_in_data(ifile, col=2) # #--- convert time into fractional year, then convert year 1999 origin # if len(t_array) > 3: t_array = convert_stime_to_fyear_list(t_array) t_array = t_array - 1999 out = rlf.least_sq(t_array, d_array) std = numpy.std(d_array) slp_line = slp_line + '%2.3e\t' % out[1] std_line = std_line + '%2.3e\t' % std else: slp_line = slp_line + '-999\t' slp_line = std_line + '-999\t' line = slp_line + std_line + '\n' fout = data_dir + 'full_mag_stats' with open(fout, 'w') as fo: fo.write(line)
def create_monthly_average(fdname, mdname, tyear, tmonth): """ create monthly averaged dataset from the full data set input: fdname --- a name of full data set mdname --- a name of monthly average data set tyear --- the stopping year of the data collection tmonth --- the stopping month of the data collection output: mdname --- updated monthly averaged data set """ # #--- read full dataset # data = mcf.read_data_file(fdname) data_set = mcf.separate_data_to_arrays(data) dlen = len(data_set) #--- # of columns in the full data set mlen = dlen + 1 #--- # of columns in the averaged data set # #--- read the monthly averaged data # data = mcf.read_data_file(mdname) if len(data) > 0: mon_data = mcf.separate_data_to_arrays(data) # #--- find the last entry date and set starting time to the beginning of the month of that time period #--- this is because the data of that last entties could be incomplete # atemp = re.split('\s+', data[-1]) ltime = float(atemp[0]) ltime = Chandra.Time.DateTime(ltime).date atemp = re.split('\.', ltime) ltime = time.strftime('%Y:%m', time.strptime(atemp[0], '%Y:%j:%H:%M:%S')) atemp = re.split(':', ltime) syear = int(float(atemp[0])) smonth = int(float(atemp[1])) # #--- if this is the first time running this script, start from the empty data set # else: mon_data = [] for k in range(0, mlen): mon_data.append([]) syear = 1999 smonth = 8 # #--- some initializations # tlen = len(data_set[0]) #--- length of the each data set save = [] #--- a list of lists to save mean of each month slp_t = [] #--- a list of time related to slope list slp_y = [] #--- a list of time related to slope list in <yyyy>:<mm> slp_s = [] #--- a list of lists to save the slope of each month std_s = [] #--- a list of lists to save std of each month tsave = [] #--- a list of lists to save time for each value sums = [] #--- a list of lists to save the value for one month # #--- save which save monthly average, will hold the data from beginning (1999:08) #--- but slope and the std hold only the newly computed part only # for k in range(0, mlen): save.append([]) slp_s.append([]) std_s.append([]) for k in range(0, dlen): sums.append([]) tsave.append([]) dpos = 0 #--- index on the full data set ipos = 0 #--- index on the monthly data set test = 0 # #--- before the data collection period starts, use the data from the monthly data set # for year in range(1999, tyear + 1): for month in range(1, 13): if year < syear: for k in range(0, mlen): try: save[k].append(mon_data[k][ipos]) except: pass ipos += 1 continue elif (year == syear) and (month < smonth): for k in range(0, mlen): try: save[k].append(mon_data[k][ipos]) except: pass ipos += 1 continue elif (year == tyear) and (month > tmonth): break # #--- data gathering of each month from syear:smonth to tyear:tmonth starts here # #if (year == syear) and (month == smonth): # ldate = str(year) + ':' + mcf.add_leading_zero(month) # slp_y.append(ldate) # continue nyear = year nmonth = month + 1 if nmonth > 12: nmonth = 1 nyear += 1 # #--- set start, mid point, and stop time in seconds from 1998.1.1 # pstart = get_ctime(year, month, 1) pmid = get_ctime(year, month, 15) pstop = get_ctime(nyear, nmonth, 1) # #--- go through the main data # for m in range(dpos, tlen): if data_set[0][m] < pstart: continue # #--- data gethering of the period finished, compute averages # elif data_set[0][m] > pstop: # #--- for the time use 15th of the month in seconds from 1998.1.1 format # save[0].append(pmid) # #--- second column is <yyyy>:<mm> # ldate = str(year) + ':' + mcf.add_leading_zero(month) save[1].append(ldate) slp_t.append(pmid) slp_y.append(ldate) # #--- now the slot data parts # for n in range(1, dlen): # #--- if there are data, make sure that they are not 'nan' before taking a mean # if len(sums[n]) > 0: [t_list, v_list] = clean_up_list(tsave[n], sums[n]) if len(t_list) > 0: save[n + 1].append(numpy.mean(v_list)) # #--- compute linear fitting on the data for the month and also compute the std of the data # if len(t_list) > 3: t_list = convert_to_ydate_list(t_list) [aa, bb, delta] = rlf.least_sq(t_list, v_list) else: bb = -999 slp_s[n + 1].append(bb) std_s[n + 1].append(numpy.std(v_list)) # #--- if there is no valid data, use -999 # else: save[n + 1].append(-999) slp_s[n + 1].append(-999) std_s[n + 1].append(-999) # #--- initialize the sums/tsave lists for the next round # try: val = float(data_set[n][m]) sums[n] = [val] tsave[n] = [data_set[0][m]] except: sums[n] = [] tsave[n] = [] continue # #--- no data during the period; just put -999 # else: save[n + 1].append(-999) slp_s[n + 1].append(-999) std_s[n + 1].append(-999) sums[n] = [] tsave[n] = [] try: val = float(data_set[n][m]) sums[n] = [val] tsave[n] = [data_set[0][m]] except: sums[n] = [] tsave[n] = [] continue # #--- a few other initialization for the next round # sums[0] = [data_set[0][m]] tsave[0] = [data_set[0][m]] dpos = m + 1 #--- where to start reading the big data lyear = year #--- keep year and month for the later use lmonth = month test = 1 break # #--- the time is between the period. accumulate the data # else: for n in range(1, dlen): try: val = data_set[n][m] if str(val).lower() in ['nan', 'na']: continue val = float(val) # #--- make sure that the value is reasonable # if val < 100 and val > -100: sums[n].append(val) tsave[n].append(data_set[0][m]) except: continue lyear = year lmonth = month test = 0 # #--- the last part may not be a complete month data # if test == 0: for n in range(1, dlen): test += len(sums[n]) if test > 0: save[0].append(pmid) ldate = str(lyear) + ':' + mcf.add_leading_zero(lmonth) save[1].append(ldate) slp_t.append(pmid) slp_y.append(ldate) for n in range(1, dlen): if len(sums[n]) > 0: [t_list, v_list] = clean_up_list(tsave[n], sums[n]) if len(t_list) > 0: save[n + 1].append(numpy.mean(v_list)) if len(t_list) > 3: t_list = convert_to_ydate_list(t_list) [aa, bb, delta] = rlf.least_sq(t_list, v_list) else: bb = -999.0 slp_s[n + 1].append(bb) std_s[n + 1].append(numpy.std(v_list)) else: save[n + 1].append(-999) slp_s[n + 1].append(-999) std_s[n + 1].append(-999) else: save[n + 1].append(-999) slp_s[n + 1].append(-999) std_s[n + 1].append(-999) # #--- update the mean monthly data set # line = '' for k in range(0, len(save[0])): # #--- prevent a dupulicated line # if k != 0: if save[0][k] == save[0][k - 1]: continue line = line + '%d\t' % save[0][k] line = line + save[1][k] + '\t' for n in range(2, mlen): line = line + format_line(save[n][k]) line = line + '\n' with open(mdname, 'w') as fo: fo.write(line) # #--- update the slope/std monthly data set # line = '' for k in range(0, len(slp_t)): line = line + '%d\t' % slp_t[k] line = line + slp_y[k] + '\t' for n in range(2, mlen): line = line + format_line(slp_s[n][k]) for n in range(2, mlen): line = line + format_line(std_s[n][k]) line = line + '\n' # #--- since the slope data are saved only the new part, read the old data #--- and find the spot whether the data are updated. # sdname = mdname + '_slope' data = mcf.read_data_file(sdname) try: btemp = re.split(':', slp_y[0]) syear = int(float(btemp[0])) smon = int(float(btemp[1])) aline = '' for ent in data: atemp = re.split('\s+', ent) btemp = re.split(':', atemp[1]) cyear = int(float(btemp[0])) cmon = int(float(btemp[1])) if cyear == syear and cmon == smon: break else: aline = aline + ent + '\n' except: aline = '' aline = aline + line with open(sdname, 'w') as fo: fo.write(aline)
def update_slot_stat_table(this_year='', this_mon=''): """ update slot related stat table data files input: none, but read from: pos_err_mtatr, diff_mtatr, acacent_mtatr output: <name>_year_slope --- yearly stats <name>_recent_slope --- most recent one year <name>_full_slope --- entire period stats are fitted linear slope and std of the data """ # #--- find the current year and month # if this_year == '': out = time.strftime('%Y:%m:%d', time.gmtime()) atemp = re.split(':', out) this_year = int(float(atemp[0])) this_mon = int(float(atemp[1])) this_day = int(float(atemp[2])) if this_day < 5: this_mon -= 1 if this_mon < 1: this_mon = 12 this_year -= 1 # #--- there are three different data sets # for head in ['pos_err', 'diff', 'acacent', 'acacent2']: thead = head.replace('2', '') ifile = data_dir + thead + '_mtatr' # #--- initialize lists for yearly data saving # ytime = [] yslope = [] ystd = [] # #--- there are 8 slots # for k in range(0, 8): yslope.append([]) ystd.append([]) # #--- initialize for recent one year stats saving # rslope = '' rstd = '' # #--- go through each slot # for k in range(0, 8): if head == 'acacent2': pos = k + 8 else: pos = k + 1 [t_array, d_array] = read_in_data(ifile, col=pos) # #--- recent one year # if len(t_array) > 3: r_cut = t_array[-1] - oneyear [slope, std] = get_slope_and_str(t_array, d_array, r_cut, t_array[-1]) rslope = rslope + slope + '\t' rstd = rstd + std + '\t' else: rslope = rslope + 'na\t' rstd = rstd + 'na\t' # #--- yearly # for year in range(1999, this_year + 1): if k == 1: ytime.append(str(year)) start = int( Chandra.Time.DateTime(str(year) + ':001:00:00:00').secs) stop = int( Chandra.Time.DateTime(str(year + 1) + ':001:00:00:00').secs) if len(t_array) > 3: [slope, std] = get_slope_and_str(t_array, d_array, start, stop) yslope[k].append(slope) ystd[k].append(std) else: yslope[k].append('nan') ystd[k].append('nan') # #--- now update the data files # # #--- most recent one year # line = rslope + rstd + '\n' rout = data_dir + head + '_mtatr_recent_slope' with open(rout, 'w') as fo: fo.write(line) # #--- yearly # line = '' for k in range(0, len(ytime)): line = line + ytime[k] + '\t' for m in range(0, 8): line = line + yslope[m][k] + '\t' for m in range(0, 8): line = line + ystd[m][k] + '\t' line = line + '\n' yout = data_dir + head + '_mtatr_year_slope' with open(yout, 'w') as fo: fo.write(line) # #--- full range stats computation uses different data sets which are already averaged on each month # for head in ['pos_err', 'diff', 'acacent', 'acacent2']: thead = head.replace('2', '') ifile = data_dir + thead + '_mtatr_month' slp_line = '' std_line = '' for k in range(0, 8): pos = k + 2 [t_array, d_array] = read_in_data(ifile, col=pos) if len(t_array) > 3: t_array = convert_to_fyear_list(t_array) out = rlf.least_sq(t_array, d_array) std = numpy.std(d_array) slp_line = slp_line + '%2.3e\t' % out[1] std_line = std_line + '%2.3e\t' % std else: slp_line = slp_line + 'na\t' std_line = std_line + 'na\t' line = slp_line + std_line + '\n' fout = data_dir + head + '_mtatr_full_slope' with open(fout, 'w') as fo: fo.write(line)
def save_data(stime, x, ny, nw, sy, sw, sk, ccd_id, obsid, omode): """ save data in a data files input: stime --- the time of the data collected x --- a list of bins ny --- a list of center postion nw --- a list of width sy --- a list of skewed center postion sw --- a list of skewed width sk --- a list of skewness ccd_id --- ccd ID obsid --- obsid omode --- data ode output: <data_dir>/<ccd_id>.dat """ # #--- estimate a slope of the fitted line # try: [intc, slope, err] = robust.least_sq(x, ny) except: intc = 0.0 slope = 0.0 # #--- remove the extreme cases and then computer mean and std # ny = remove_extreme(ny) #--- normal center ym1 = numpy.mean(ny) ym2 = numpy.std(ny) nw = remove_extreme(nw) #--- normal width wm1 = numpy.mean(nw) wm2 = numpy.std(nw) sy = remove_extreme(sy) #--- skewed center ym3 = numpy.mean(sy) ym4 = numpy.std(sy) sw = remove_extreme(sw) #--- skewed width wm3 = numpy.mean(sw) wm4 = numpy.std(sw) skm = numpy.mean(sk) #--- skewness sline = '%2.7e\t' % stime sline = sline + str(obsid) + '\t' try: val = float(slope) sline = sline + '%2.6e\t' % slope except: sline = sline + 'NA\t' sline = sline + '%2.6f\t' % ym1 sline = sline + '%2.6f\t' % ym2 sline = sline + '%2.6f\t' % wm1 sline = sline + '%2.6f\t' % wm2 sline = sline + '%2.6f\t' % ym3 sline = sline + '%2.6f\t' % ym4 sline = sline + '%2.6f\t' % wm3 sline = sline + '%2.6f\t' % wm4 sline = sline + '%2.6f\n' % skm # #--- save in a file # outfile = data_dir + ccd_id + '_' + omode + '.dat' # #--- if this is the first time, add the header # if not os.path.isfile(outfile): header = '#' + '-' * 144 + '\n' header = header + '#time\t\t\tobsid\tslope\t\t\tcent mean\tcent std\t' header = header + 'width mean\twidth std\tskew cent\tskew c std\t' header = header + 'skew width\tskew w std\tskewness\n' + '#' header = header + '-' * 144 + '\n' with open(outfile, 'w') as fo: fo.write(header) fo.write(sline) else: with open(outfile, 'a') as fo: fo.write(sline)