def values_vs_lat_dist(var, lats, data, qc_flags, these_flags, filename, multiplier = 100., doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False): # end ''' Plots showing benefit of QC using all QC flags bar day/night This version adds a line for each set of data to show the frequency distribution of all values :param MetVar variable: input variable :param array lats: latitudes :param array data: indata :param array qc_flags: QC flag array :param array these_flags: QC flags to apply :param str filename: output filename :param float multiplier: multiplier which has been applied to the data already. :param bool doBC: work on the bias corrected QC flag definitions # KATE modified - BC options :param bool doBCtotal: work on the full bias corrected QC flag definitions :param bool doBChgt: work on the height only bias corrected QC flag definitions :param bool doBCscn: work on the screen only bias corrected QC flag definitions # end ''' # get the final data mask # KATE modified - BC options # data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC) data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end # apportion the mask clean_data = np.ma.masked_array(data, data_mask) dirty_data = np.ma.masked_array(data, np.logical_not(data_mask)) # make a 2-panel plot plt.clf() # all data ax1 = plt.subplot(1,2,1) # KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100. ax1.scatter(data/multiplier, lats/100., c = 'gold', marker = '.') # KATE changed to gold ax1.scatter(dirty_data/multiplier, lats/100., c = 'r', marker = '.') #plt.scatter(data/multiplier, lats/multiplier, c = 'k', marker = '.') #plt.scatter(dirty_data/multiplier, lats/multiplier, c = 'r', marker = '.') # end # Make histograms of the distribution of values and plot # What are the xaxis limits? xMin = np.min(data/multiplier) xMax = np.max(data/multiplier) xRange = xMax - xMin xMin = np.floor(xMin - (0.1*xRange)) xMax = np.ceil(xMax + (0.1*xRange)) # Set up equally spaced histogram bins between min and max range (including end point as a value for 51 bins, binsies = np.linspace(xMin,xMax,51) # a range of bins from left most to right most point # Set up the second axes ax2 = ax1.twinx() # Plot for all data ThisHist = np.histogram(data/multiplier,binsies) y2Max = np.max(ThisHist[0]) HalfX = (ThisHist[1][1] - ThisHist[1][0]) / 2. ax2.plot(ThisHist[1][0:50]+HalfX,ThisHist[0],c='gold',linestyle='solid',linewidth=4) meanHist = '{:5.1f}'.format(np.mean(data/multiplier)) sdHist = '{:5.1f}'.format(np.std(data/multiplier)) ax2.annotate('All Obs ('+meanHist+', '+sdHist+')',xy=(0.05,0.96),xycoords='axes fraction',size=12,color='gold') # Plot for failed data ThisHist = np.histogram(dirty_data[~dirty_data.mask]/multiplier,binsies) HalfX = (ThisHist[1][1] - ThisHist[1][0]) / 2. ax2.plot(ThisHist[1][0:50]+HalfX,ThisHist[0],c='r',linestyle='solid',linewidth=4) meanHist = '{:5.1f}'.format(np.mean(dirty_data[~dirty_data.mask]/multiplier)) sdHist = '{:5.1f}'.format(np.std(dirty_data[~dirty_data.mask]/multiplier)) PctFail = '{:5.1f}'.format((len(dirty_data[~dirty_data.mask]) / np.float(len(data))) * 100.)+'%' ax2.annotate('Bad Obs ('+meanHist+', '+sdHist+', '+PctFail+')',xy=(0.05,0.92),xycoords='axes fraction',size=12,color='r') ax1.set_ylim([-90,90]) ax1.set_xlim([xMin,xMax]) ax2.set_ylim([0,y2Max*1.1]) ax1.set_yticks(np.arange(-90,120,30)) #ax1.yticks(np.arange(-90,120,30), fontsize = 12) ax1.set_title("All data") #ax1.title("All data", fontsize = 12) ax1.set_xlabel(var.units) ax1.set_ylabel("latitude") labels = [item.get_text() for item in ax2.get_yticklabels()] empty_string_labels = ['']*len(labels) ax2.set_yticklabels(empty_string_labels) #ax2.set_ylabel("number of observations") #pdb.set_trace() # clean data ax1 = plt.subplot(1,2,2) # KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100. ax1.scatter(clean_data/multiplier, lats/100., c = 'b', marker = '.') #plt.scatter(clean_data/multiplier, lats/multiplier, c = 'b', marker = '.') # end # Make histograms of the distribution of values and plot # xaxis limits already established from above # Set up the second axes ax2 = plt.twinx() # Plot for all data ThisHist = np.histogram(clean_data[~clean_data.mask]/multiplier,binsies) # y2Max = np.max(ThisHist[0]) HalfX = (ThisHist[1][1] - ThisHist[1][0]) / 2. ax2.plot(ThisHist[1][0:50]+HalfX,ThisHist[0],c='b',linestyle='solid',linewidth=4) meanHist = '{:5.1f}'.format(np.mean(clean_data[~clean_data.mask]/multiplier)) sdHist = '{:5.1f}'.format(np.std(clean_data[~clean_data.mask]/multiplier)) ax2.annotate('Clean Obs ('+meanHist+', '+sdHist+')',xy=(0.05,0.96),xycoords='axes fraction',size=12,color='b') ax1.set_ylim([-90,90]) ax1.set_xlim([xMin,xMax]) ax2.set_ylim([0,y2Max*1.1]) ax1.set_yticks(np.arange(-90,120,30)) #ax1.set_yticks(np.arange(-90,120,30), fontsize = 12) labels = [item.get_text() for item in ax1.get_yticklabels()] empty_string_labels = ['']*len(labels) ax1.set_yticklabels(empty_string_labels) ax1.set_title("Clean data") #ax1.set_title("Clean data", fontsize = 12) ax1.set_xlabel(var.units) ax2.set_ylabel("number of observations") plt.figtext(0.5, 0.95, var.long_name, ha = 'center', fontsize = 14) plt.savefig(filename) return # values_vs_lat_dist
def values_vs_lat(var, lats, data, qc_flags, these_flags, filename, multiplier = 100., doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False): # end ''' Plots showing benefit of QC using all QC flags bar day/night :param MetVar variable: input variable :param array lats: latitudes :param array data: indata :param array qc_flags: QC flag array :param array these_flags: QC flags to apply :param str filename: output filename :param float multiplier: multiplier which has been applied to the data already. :param bool doBC: work on the bias corrected QC flag definitions # KATE modified - BC options :param bool doBCtotal: work on the full bias corrected QC flag definitions :param bool doBChgt: work on the height only bias corrected QC flag definitions :param bool doBCscn: work on the screen only bias corrected QC flag definitions # end ''' # get the final data mask # KATE modified - QC options # data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC) data_mask = utils.process_qc_flags(qc_flags, these_flags, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end # apportion the mask clean_data = np.ma.masked_array(data, data_mask) dirty_data = np.ma.masked_array(data, np.logical_not(data_mask)) # make a 2-panel plot plt.clf() # all data plt.subplot(1,2,1) # KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100. plt.scatter(data/multiplier, lats/100., c = 'gold', marker = '.') # KATE changed to gold plt.scatter(dirty_data/multiplier, lats/100., c = 'r', marker = '.') #plt.scatter(data/multiplier, lats/multiplier, c = 'k', marker = '.') #plt.scatter(dirty_data/multiplier, lats/multiplier, c = 'r', marker = '.') # end plt.ylim([-90,90]) plt.yticks(np.arange(-90,120,30), fontsize = 12) plt.title("All data", fontsize = 12) plt.xlabel(var.units) plt.ylabel("latitude") # clean data plt.subplot(1,2,2) # KATE modified - lats should only be div 100, not 10 (absolute.multiplier) so hard wired to 100. plt.scatter(clean_data/multiplier, lats/100., c = 'b', marker = '.') #plt.scatter(clean_data/multiplier, lats/multiplier, c = 'b', marker = '.') # end plt.ylim([-90,90]) plt.yticks(np.arange(-90,120,30), fontsize = 12) plt.title("Clean data", fontsize = 12) plt.xlabel(var.units) plt.figtext(0.5, 0.95, var.long_name, ha = 'center', fontsize = 14) plt.savefig(filename) return # plot_values_vs_lat
def do_gridding(suffix = "relax", start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, doQC = False, doQC1it = False, doQC2it = False, doQC3it = False, doSST_SLP = False, doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False, doUncert = False): #def do_gridding(suffix = "relax", start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, doQC = False, doSST_SLP = False, doBC = False, doUncert = False): # end ''' Do the gridding, first to 3hrly 1x1, then to daily 1x1 and finally monthly 1x1 and 5x5 :param str suffix: "relax" or "strict" criteria :param int start_year: start year to process :param int end_year: end year to process :param int start_month: start month to process :param int end_month: end month to process :param bool doQC: incorporate the QC flags or not :param bool doQC1it: incorporate the first iteration (no buddy) QC flags or not :param bool doQC2it: incorporate the second iteration (no buddy) QC flags or not :param bool doQC3it: incorporate the third iteration (buddy) QC flags or not :param bool doSST_SLP: process additional variables or not :param bool doBC: work on the bias corrected data :param bool doBCtotal: work on the full bias corrected data :param bool doBChgt: work on the height only bias corrected data :param bool doBCscn: work on the screen only bias corrected data :param bool doUncert: work on files with uncertainty information (not currently used) :returns: ''' # KATE modified settings = set_paths_and_vars.set(doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn, doQC = doQC, doQC1it = doQC1it, doQC2it = doQC2it, doQC3it = doQC3it) #settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # end # KATE modified - added other BC options # if doBC: if doBC | doBCtotal | doBChgt | doBCscn: # end fields = mds.TheDelimitersExt # extended (BC) else: fields = mds.TheDelimitersStd # Standard # KATE modified - added other BC options # OBS_ORDER = utils.make_MetVars(settings.mdi, doSST_SLP = doSST_SLP, multiplier = True, doBC = doBC) # ensure that convert from raw format at writing stage with multiplier OBS_ORDER = utils.make_MetVars(settings.mdi, doSST_SLP = doSST_SLP, multiplier = True, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # ensure that convert from raw format at writing stage with multiplier # end # KW switching between 4 ('_strict') for climatology build and 2 for anomaly buily ('_relax') - added subscripts to files if suffix == "relax": N_OBS_DAY = 2 # KW ok for anomalies but this was meant to be 4 for dailies_all? and 2 for dailies_night/day? N_OBS_FRAC_MONTH = 0.3 elif suffix == "strict": N_OBS_DAY = 4 N_OBS_FRAC_MONTH = 0.3 # flags to check on and values to allow through # KATE modified if doQC1it | doQC2it: these_flags = {"ATclim":0,"ATrep":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0} else: these_flags = {"ATbud":0, "ATclim":0,"ATrep":0,"DPTbud":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0} #these_flags = {"ATbud":0, "ATclim":0,"ATrep":0,"DPTbud":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0} # end # spin through years and months to read files for year in np.arange(start_year, end_year + 1): for month in np.arange(start_month, end_month + 1): times = utils.TimeVar("time", "time since 1/{}/{} in hours".format(month, year), "hours", "time") grid_hours = np.arange(0, 24 * calendar.monthrange(year, month)[1], DELTA_HOUR) times.data = grid_hours # process the monthly file # KATE modified - added other BC options # if doBC: if doBC | doBCtotal | doBChgt | doBCscn: # end filename = "new_suite_{}{:02d}_{}_extended.txt".format(year, month, settings.OUTROOT) else: filename = "new_suite_{}{:02d}_{}.txt".format(year, month, settings.OUTROOT) # KATE modified - added other BC options # raw_platform_data, raw_obs, raw_meta, raw_qc = utils.read_qc_data(filename, settings.ICOADS_LOCATION, fields, doBC = doBC) raw_platform_data, raw_obs, raw_meta, raw_qc = utils.read_qc_data(filename, settings.ICOADS_LOCATION, fields, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end # extract observation details lats, lons, years, months, days, hours = utils.process_platform_obs(raw_platform_data) # test dates *KW - SHOULDN'T NEED THIS - ONLY OBS PASSING DATE CHECK ARE INCLUDED* # *RD* - hasn't run yet but will leave it in just in case of future use. if not utils.check_date(years, year, "years", filename): sys.exit(1) if not utils.check_date(months, month, "months", filename): sys.exit(1) # KATE modified - seems to be an error with missing global name plots so have changed to settings.plots # Choose this one to only output once per decade #if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # Choose this one to output a plot for each month if settings.plots: #if plots and (year in [1973, 1983, 1993, 2003, 2013]): # end # plot the distribution of hours import matplotlib.pyplot as plt plt.clf() plt.hist(hours, np.arange(-100,2500,100)) plt.ylabel("Number of observations") plt.xlabel("Hours") plt.xticks(np.arange(-300, 2700, 300)) plt.savefig(settings.PLOT_LOCATION + "obs_distribution_{}{:02d}_{}.png".format(year, month, suffix)) # only for a few of the variables for variable in OBS_ORDER: if variable.name in ["marine_air_temperature", "dew_point_temperature", "specific_humidity", "relative_humidity", "marine_air_temperature_anomalies", "dew_point_temperature_anomalies", "specific_humidity_anomalies", "relative_humidity_anomalies"]: #plot_qc_diagnostics.values_vs_lat(variable, lats, raw_obs[:, variable.column], raw_qc, these_flags, settings.PLOT_LOCATION + "qc_actuals_{}_{}{:02d}_{}.png".format(variable.name, year, month, suffix), multiplier = variable.multiplier, doBC = doBC) plot_qc_diagnostics.values_vs_lat_dist(variable, lats, raw_obs[:, variable.column], raw_qc, these_flags, \ settings.PLOT_LOCATION + "qc_actuals_{}_{}{:02d}_{}.png".format(variable.name, year, month, suffix), multiplier = variable.multiplier, \ # KATE modified - added other BC options doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end # QC sub-selection # KATE modified - added QC iterations but also think this needs to include the bias corrected versions because the QC flags need to be applied to those too. # Not sure what was happening previously with the doBC run - any masking to QC'd obs? if doQC | doQC1it | doQC2it | doQC3it | doBC | doBCtotal | doBChgt | doBCscn: #if doQC: # end print "Using {} as flags".format(these_flags) # KATE modified - BC options # mask = utils.process_qc_flags(raw_qc, these_flags, doBC = doBC) mask = utils.process_qc_flags(raw_qc, these_flags, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end print "All Obs: ",len(mask) print "Good Obs: ",len(mask[np.where(mask == 0)]) print "Bad Obs: ",len(mask[np.where(mask == 1)]) #pdb.set_trace() complete_mask = np.zeros(raw_obs.shape) for i in range(raw_obs.shape[1]): complete_mask[:,i] = mask clean_data = np.ma.masked_array(raw_obs, mask = complete_mask) # end else: print "No QC flags selected" clean_data = np.ma.masked_array(raw_obs, mask = np.zeros(raw_obs.shape)) # discretise hours hours = utils.make_index(hours, DELTA_HOUR, multiplier = 100) # get the hours since start of month hours_since = ((days - 1) * 24) + (hours * DELTA_HOUR) # discretise lats/lons lat_index = utils.make_index(lats, DELTA_LAT, multiplier = 100) lon_index = utils.make_index(lons, DELTA_LON, multiplier = 100) lat_index += ((len(grid_lats)-1)/2) # and as -ve indices are unhelpful, roll by offsetting by most westward lon_index += ((len(grid_lons)-1)/2) # or most southerly so that (0,0) is (-90,-180) # NOTE - ALWAYS GIVING TOP-RIGHT OF BOX TO GIVE < HARD LIMIT (as opposed to <=) # do the gridding # extract the full grid, number of obs, and day/night flag # KATE MEDIAN WATCH This is hard coded to doMedian (rather than settings.doMedian) - OK WITH MEDIAN HERE!!! # KATE modified - to add settings.doMedian instead of just doMedian which seems to be consistent with the other bits and BC options raw_month_grid, raw_month_n_obs, this_month_period = utils.grid_1by1_cam(clean_data, raw_qc, hours_since, lat_index, lon_index, \ grid_hours, grid_lats, grid_lons, OBS_ORDER, settings.mdi, doMedian = settings.doMedian, \ doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) #raw_month_grid, raw_month_n_obs, this_month_period = utils.grid_1by1_cam(clean_data, raw_qc, hours_since, lat_index, lon_index, grid_hours, grid_lats, grid_lons, OBS_ORDER, settings.mdi, doMedian = True, doBC = doBC) # end print "successfully read data into 1x1 3hrly grids" # create matching array size this_month_period = np.tile(this_month_period, (len(OBS_ORDER),1,1,1)) for period in ["all", "day", "night"]: if period == "day": this_month_grid = np.ma.masked_where(this_month_period == 1, raw_month_grid) this_month_obs = np.ma.masked_where(this_month_period[0] == 1, raw_month_n_obs) # and take first slice to re-match the array size elif period == "night": this_month_grid = np.ma.masked_where(this_month_period == 0, raw_month_grid) this_month_obs = np.ma.masked_where(this_month_period[0] == 0, raw_month_n_obs) # and take first slice to re-match the array size else: this_month_grid = copy.deepcopy(raw_month_grid) this_month_obs = copy.deepcopy(raw_month_n_obs) # KATE modified # If SwitchOutput == 1 then we're in test mode - output interim files!!! if (SwitchOutput == 1): # have one month of gridded data. out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_3hr_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) utils.netcdf_write(out_filename, this_month_grid, np.zeros(this_month_obs.shape), this_month_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "H") ## have one month of gridded data. #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_3hr_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) #utils.netcdf_write(out_filename, this_month_grid, np.zeros(this_month_obs.shape), this_month_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "H") # end # now average over time # Dailies daily_hours = grid_hours.reshape(-1, 24/DELTA_HOUR) shape = this_month_grid.shape this_month_grid = this_month_grid.reshape(shape[0], -1, 24/DELTA_HOUR, shape[2], shape[3]) this_month_obs = this_month_obs.reshape(-1, 24/DELTA_HOUR, shape[2], shape[3]) # KATE MEDIAN WATCH - settings.doMedian is generally set to True - I think we may want the MEAN HERE!!! # KATE modified - to hard wire in MEAN here daily_grid = np.ma.mean(this_month_grid, axis = 2) #if settings.doMedian: # daily_grid = np.ma.median(this_month_grid, axis = 2) #else: # daily_grid = np.ma.mean(this_month_grid, axis = 2) # end daily_grid.fill_value = settings.mdi # filter on number of observations/day n_hrs_per_day = np.ma.count(this_month_grid, axis = 2) n_obs_per_day = np.ma.sum(this_month_obs, axis = 1) if period == "all": bad_locs = np.where(n_hrs_per_day < N_OBS_DAY) # at least 2 of possible 8 3-hourly values (6hrly data *KW OR AT LEAST 4 3HRLY OBS PRESENT*) else: bad_locs = np.where(n_hrs_per_day < np.floor(N_OBS_DAY / 2.)) # at least 1 of possible 8 3-hourly values (6hrly data *KW OR AT LEAST 4 3HRLY OBS PRESENT*) daily_grid.mask[bad_locs] = True # KATE modified - added SwitchOutput to if loop if (SwitchOutput == 1) and settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): #if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # end # plot the distribution of hours plt.clf() plt.hist(n_hrs_per_day.reshape(-1), bins = np.arange(-1,10), align = "left", log = True, rwidth=0.5) if period == "all": plt.axvline(x = N_OBS_DAY-0.5, color = "r") else: plt.axvline(x = np.floor(N_OBS_DAY / 2.)-0.5, color = "r") plt.title("Number of 1x1-3hrly in each 1x1-daily grid box") plt.xlabel("Number of 3-hrly observations (max = 8)") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_grids_1x1_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) plt.clf() plt.hist(n_obs_per_day.reshape(-1), bins = np.arange(-5,100,5), log = True, rwidth=0.5) plt.title("Total number of raw observations in each 1x1 daily grid box") plt.xlabel("Number of raw observations") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_obs_1x1_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # clear up memory del this_month_grid del this_month_obs gc.collect() # KATE modified # If SwitchOutput == 1 then we're in test mode - output interim files!!! if (SwitchOutput == 1): # write dailies file times.data = daily_hours[:,0] out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) utils.netcdf_write(out_filename, daily_grid, n_hrs_per_day[0], n_obs_per_day, OBS_ORDER, grid_lats, grid_lons, times, frequency = "D") #times.data = daily_hours[:,0] #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) #utils.netcdf_write(out_filename, daily_grid, n_hrs_per_day[0], n_obs_per_day, OBS_ORDER, grid_lats, grid_lons, times, frequency = "D") # end # Monthlies times.data = daily_hours[0,0] # KATE modified - commenting out as we don't need this anymore # if settings.doMedian: # monthly_grid = np.ma.median(daily_grid, axis = 1) # else: # monthly_grid = np.ma.mean(daily_grid, axis = 1) # # monthly_grid.fill_value = settings.mdi # # # filter on number of observations/month # n_grids_per_month = np.ma.count(daily_grid, axis = 1) # bad_locs = np.where(n_grids_per_month < calendar.monthrange(year, month)[1] * N_OBS_FRAC_MONTH) # 30% of possible daily values # monthly_grid.mask[bad_locs] = True # # # number of raw observations # n_obs_per_month = np.ma.sum(n_obs_per_day, axis = 0) # # if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # # plot the distribution of days # # plt.clf() # plt.hist(n_obs_per_month.reshape(-1), bins = np.arange(-10,500,10), log = True, rwidth=0.5) # plt.title("Total number of raw observations in each 1x1 monthly grid box") # plt.xlabel("Number of raw observations") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_obs_1x1_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # plt.clf() # plt.hist(n_grids_per_month[0].reshape(-1), bins = np.arange(-2,40,2), align = "left", log = True, rwidth=0.5) # plt.axvline(x = calendar.monthrange(year, month)[1] * N_OBS_FRAC_MONTH, color="r") # plt.title("Total number of 1x1 daily grids in each 1x1 monthly grid") # plt.xlabel("Number of 1x1 daily grids") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_grids_1x1_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # # write monthly 1x1 file # out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_monthly_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) # utils.netcdf_write(out_filename, monthly_grid, n_grids_per_month[0], n_obs_per_month, OBS_ORDER, grid_lats, grid_lons, times, frequency = "M") # # # now to re-grid to coarser resolution # # KW # Here we may want to use the mean because its a large area but could be sparsely # # populated with quite different climatologies so we want # # the influence of the outliers (we've done our best to ensure these are good values) # # # go from monthly 1x1 to monthly 5x5 - retained as limited overhead # monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(monthly_grid, n_obs_per_month, grid_lats, grid_lons, doMedian = settings.doMedian, daily = False) # out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) # # utils.netcdf_write(out_filename, monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, OBS_ORDER, grid5_lats, grid5_lons, times, frequency = "M") # # if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # # plot the distribution of days # # plt.clf() # plt.hist(monthly_5by5_n_obs.reshape(-1), bins = np.arange(0,100,5), log = True, rwidth=0.5) # plt.title("Total number of raw observations in each 5x5 monthly grid box") # plt.xlabel("Number of raw observations") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_obs_5x5_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # plt.clf() # plt.hist(monthly_5by5_n_grids.reshape(-1), bins = np.arange(-2,30,2), align = "left", log = True, rwidth=0.5) # plt.axvline(x = 1, color="r") # plt.title("Total number of 1x1 monthly grids in each 5x5 monthly grid") # plt.xlabel("Number of 1x1 monthly grids") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_grids_5x5_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # # clear up memory # del monthly_grid # del monthly_5by5 # del monthly_5by5_n_grids # del monthly_5by5_n_obs # del n_grids_per_month # del n_obs_per_month # del n_hrs_per_day # gc.collect() # end # go direct from daily 1x1 to monthly 5x5 # KATE MEDIAN WATCH - settings.doMedian is generally set to True - I think we may want the MEAN HERE!!! # KATE modified - to hard wire in MEAN here monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(daily_grid, n_obs_per_day, grid_lats, grid_lons, doMedian = False, daily = True) #monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(daily_grid, n_obs_per_day, grid_lats, grid_lons, doMedian = settings.doMedian, daily = True) # end out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_from_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) utils.netcdf_write(out_filename, monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, OBS_ORDER, grid5_lats, grid5_lons, times, frequency = "M") if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # plot the distribution of days plt.clf() plt.hist(monthly_5by5_n_obs.reshape(-1), bins = np.arange(-10,1000,10), log = True, rwidth=0.5) plt.title("Total number of raw observations in each 5x5 monthly grid box") plt.xlabel("Number of raw observations") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_obs_5x5_monthly_from_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) plt.clf() plt.hist(monthly_5by5_n_grids.reshape(-1), bins = np.arange(-5,100,5), align = "left", log = True, rwidth=0.5) plt.axvline(x = (0.3 * daily_grid.shape[0]), color="r") plt.title("Total number of 1x1 daily grids in each 5x5 monthly grid") plt.xlabel("Number of 1x1 daily grids") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_grids_5x5_monthly_from_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) del daily_grid del monthly_5by5 del n_obs_per_day del monthly_5by5_n_grids del monthly_5by5_n_obs gc.collect() return # do_gridding