def apply_climatology(suffix = "relax", period = "both", daily = False, doQC = False, doQC1it = False, doQC2it = False, doQC3it = False, doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False): #doQC = False, doBC = False): # end ''' Apply monthly 5x5 climatology :param str suffix: "relax" or "strict" criteria :param str period: which period to do day/night/both? :param bool daily: run in 1x1 daily --> 5x5 monthly data :param bool doQC: incorporate the QC flags or not # KATE modified :param bool doQC1it: incorporate the 1st iteration QC flags or not :param bool doQC2it: incorporate the 2nd iteration QC flags or not :param bool doQC3it: incorporate the 3rd iteration QC flags or not # end :param bool doBC: work on the bias corrected data # KATE modified :param bool doBCtotal: work on the bias corrected data :param bool doBChgt: work on the hieght only bias corrected data :param bool doBCscn: work on the screen only bias corrected data # end :returns: ''' # KATE modified settings = set_paths_and_vars.set(doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn, doQC = doQC, doQC1it = doQC1it, doQC2it = doQC2it, doQC3it = doQC3it) #settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # end if suffix == "relax": N_YEARS_PRESENT = 10 # number of years present to calculate climatology elif suffix == "strict": N_YEARS_PRESENT = 15 # number of years present to calculate climatology print "Do daily: {}".format(daily) # set filenames if daily: climfilename = settings.DATA_LOCATION + "{}_5x5_monthly_climatology_from_daily_{}_{}.nc".format(settings.OUTROOT, period, suffix) obsfilename = settings.DATA_LOCATION + "{}_5x5_monthly_from_daily_{}_{}.nc".format(settings.OUTROOT, period, suffix) else: climfilename = settings.DATA_LOCATION + "{}_5x5_monthly_climatology_{}_{}.nc".format(settings.OUTROOT, period, suffix) obsfilename = settings.DATA_LOCATION + "{}_5x5_monthly_from_daily_{}_{}.nc".format(settings.OUTROOT, period, suffix) # load netCDF files clim_file = ncdf.Dataset(climfilename,'r', format='NETCDF4') obs_file = ncdf.Dataset(obsfilename,'r', format='NETCDF4') # simple - use a list and append all_anoms = [] # spin through all variables for v, var in enumerate(OBS_ORDER): print var.name obs = obs_file.variables[var.name][:] clims = clim_file.variables[var.name][:] anomalies = obs - np.tile(clims, (obs.shape[0]/12.,1,1)) # make to same shape all_anoms += [anomalies] # finished - convert list to array all_anoms = np.ma.array(all_anoms) # extract remaining information to copy across n_obs = obs_file.variables["n_obs"][:] n_grids = obs_file.variables["n_grids"][:] # set up the time object and axis intimes = obs_file.variables["time"] times = utils.TimeVar("time", intimes.long_name, intimes.units, intimes.standard_name) times.data = intimes[:] # write file if daily: # KATE modified - added renorm19812010 to the filename out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_renorm19812010_anomalies_from_daily_{}_{}.nc".format(period, suffix) #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_anomalies_from_daily_{}_{}.nc".format(period, suffix) # end else: # KATE modified - added renorm19812010 to the filename out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_renorm19812010_anomalies_{}_{}.nc".format(period, suffix) #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_anomalies_{}_{}.nc".format(period, suffix) # end # KATE modified - only outputting 90 to -90 now and have changed grid_lats above utils.netcdf_write(out_filename, all_anoms, n_grids, n_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "Y") #if period == "both": # utils.netcdf_write(out_filename, all_anoms, n_grids, n_obs, OBS_ORDER, grid_lats[::-1], grid_lons, times, frequency = "Y") #else: # utils.netcdf_write(out_filename, all_anoms, n_grids, n_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "Y") # end return # apply_climatology
def calculate_climatology(suffix = "relax", start_year = 1981, end_year = 2010, period = "both", daily = False, doQC = False, doQC1it = False, doQC2it = False, doQC3it = False, doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False): #def calculate_climatology(suffix = "relax", start_year = 1981, end_year = 2010, period = "both", daily = False, doQC = False, doBC = False): # end ''' Make 5x5 monthly climatology :param str suffix: "relax" or "strict" criteria :param int start_year: start year to process :param int end_year: end year to process :param str period: which period to do day/night/both? :param bool daily: run in 1x1 daily --> 5x5 monthly data :param bool doQC: incorporate the QC flags or not # KATE modified :param bool doQC1it: incorporate the 1st iteration QC flags or not :param bool doQC2it: incorporate the 2nd iteration QC flags or not :param bool doQC3it: incorporate the 3rd iteration QC flags or not # end :param bool doBC: work on the bias corrected data # KATE modified :param bool doBCtotal: work on the bias corrected data :param bool doBChgt: work on the height only bias corrected data :param bool doBCscn: work on the screen only bias corrected data # end :returns: ''' # KATE modified settings = set_paths_and_vars.set(doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn, doQC = doQC, doQC1it = doQC1it, doQC2it = doQC2it, doQC3it = doQC3it) #settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # end if suffix == "relax": N_YEARS_PRESENT = 10 # number of years present to calculate climatology elif suffix == "strict": N_YEARS_PRESENT = 15 # number of years present to calculate climatology print "Do daily: {}".format(daily) N_YEARS = end_year - start_year + 1 # read in each variable - memory issues all_clims = np.ma.zeros([len(OBS_ORDER), 12, len(grid_lats), len(grid_lons)]) # KW - why set up as np.ones? all_clims.mask = np.zeros([len(OBS_ORDER), 12, len(grid_lats), len(grid_lons)]) all_stds = np.ma.zeros([len(OBS_ORDER), 12, len(grid_lats), len(grid_lons)]) all_stds.mask = np.zeros([len(OBS_ORDER), 12, len(grid_lats), len(grid_lons)]) # KW no mask??? I've set one with fill_value as -1 - should the mask be .zeros or .ones though? all_n_obs = np.ma.zeros([N_YEARS * 12, len(grid_lats), len(grid_lons)]) all_n_obs.mask = np.zeros([N_YEARS * 12, len(grid_lats), len(grid_lons)]) all_n_obs.fill_value = -1 if daily: filename = settings.DATA_LOCATION + "{}_5x5_monthly_from_daily_{}_{}.nc".format(settings.OUTROOT, period, suffix) else: filename = settings.DATA_LOCATION + "{}_5x5_monthly_{}_{}.nc".format(settings.OUTROOT, period, suffix) ncdf_file = ncdf.Dataset(filename,'r', format='NETCDF4') times = ncdf_file.variables["time"] data_start = int(times.long_name.split(" ")[2].split("/")[-1]) clim_offset = (start_year - data_start) * 12 for v, var in enumerate(OBS_ORDER): print var.name # number of pentads = 365/5 = 73 # set up empty data array all_months = np.ma.zeros([N_YEARS * 12, len(grid_lats), len(grid_lons)]) # sets up a mask of 'False' = not masked! all_months.mask = np.zeros([N_YEARS * 12, len(grid_lats), len(grid_lons)]) all_months.fill_value = settings.mdi all_months[:, :, :] = ncdf_file.variables[var.name][clim_offset:clim_offset + (30*12)] # months x lats x lons shape = all_months.shape all_months = all_months.reshape(-1, 12, shape[-2], shape[-1]) n_grids = np.ma.count(all_months, axis = 0) # collapse down the years # KATE MEDIAN WATCH # KATE modified - forced to use MEAN all_clims[v, :, :, :] = np.ma.mean(all_months, axis = 0) #if settings.doMedian: # all_clims[v, :, :, :] = utils.bn_median(all_months, axis = 0) #else: # all_clims[v, :, :, :] = np.ma.mean(all_months, axis = 0) # end all_stds[v, :, :, :] = np.ma.std(all_months, axis = 0) # mask where fewer than 50% of years have data locs = np.ma.where(n_grids < N_YEARS_PRESENT) all_clims[v, :, :, :].mask[locs] = True # KW should probably mask stdev too - although unmasked it does show the potential coverage all_stds[v, :, :, :].mask[locs] = True if settings.plots and v == 0: import matplotlib.pyplot as plt plt.clf() plt.hist(n_grids.reshape(-1), bins = np.arange(-1,32), align = "left", log = True, rwidth=0.5) plt.axvline(x = N_YEARS_PRESENT-0.5, color = "r") plt.title("Number of years present in each pentad") plt.xlabel("Number of years (max = 30)") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "monthly_5x5_clims_n_years_{}_{}.png".format(period, suffix)) # now process number of observations (KW all_n_obs wasn't a masked array - so have set it up as one - BUT not really convinced this # is working as it should. No import numpy.ma? all_n_obs[:, :, :] = ncdf_file.variables["n_obs"][clim_offset:clim_offset + (30*12)] all_n_obs = all_n_obs.reshape(-1, 12, shape[-2], shape[-1]) all_obs = np.ma.sum(all_n_obs, axis = 0) # set up time array times = utils.TimeVar("time", "time since 1/1/{} in days".format(1), "days", "time") month_lengths = [calendar.monthrange(1, x + 1)[1] for x in range(12)] times.data = [sum(month_lengths[0:x]) for x in range(12)] # write files if daily: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_climatology_from_daily_{}_{}.nc".format(period, suffix) else: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_climatology_{}_{}.nc".format(period, suffix) # KATE modified - only outputting 90 to -90 now and have changed grid_lats above utils.netcdf_write(out_filename, all_clims, n_grids, all_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "Y") #if period == "both": # utils.netcdf_write(out_filename, all_clims, n_grids, all_obs, OBS_ORDER, grid_lats[::-1], grid_lons, times, frequency = "Y") #else: # utils.netcdf_write(out_filename, all_clims, n_grids, all_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "Y") # end if daily: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_stdev_from_daily_{}_{}.nc".format(period, suffix) else: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_stdev_{}_{}.nc".format(period, suffix) # KATE modified - only outputting 90 to -90 now and have changed grid_lats above utils.netcdf_write(out_filename, all_stds, n_grids, all_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "Y") #if period == "both": # utils.netcdf_write(out_filename, all_stds, n_grids, all_obs, OBS_ORDER, grid_lats[::-1], grid_lons, times, frequency = "Y") #else: # utils.netcdf_write(out_filename, all_stds, n_grids, all_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "Y") # end # test distribution of obs with grid boxes if daily: outfile = file(settings.OUTROOT + "_5x5_monthly_climatology_from_daily_{}_{}.txt".format(period, suffix), "w") else: outfile = file(settings.OUTROOT + "_5x5_monthly_climatology_{}_{}.txt".format(period, suffix), "w") utils.boxes_with_n_obs(outfile, all_obs, all_clims[0], N_YEARS_PRESENT) return # calculate_climatology
import os import datetime as dt import numpy as np import sys import argparse import matplotlib matplotlib.use('Agg') import calendar import netCDF4 as ncdf import copy import utils import set_paths_and_vars defaults = set_paths_and_vars.set() OBS_ORDER = utils.make_MetVars(defaults.mdi, multiplier = False) # what size grid (lat/lon) DELTA_LAT = 5 DELTA_LON = 5 # set up the grid # set up the grid # KATE modified - flipped the lats to go 90 to -90 grid_lats = np.arange(90 - DELTA_LAT, -90 - DELTA_LAT, -DELTA_LAT) #grid_lats = np.arange(-90 + DELTA_LAT, 90 + DELTA_LAT, DELTA_LAT) # end grid_lons = np.arange(-180 + DELTA_LAT, 180 + DELTA_LON, DELTA_LON)
def make_timeseries( suffix="relax", doQC=False, doQC1it=False, doQC2it=False, doQC3it=False, doBC=False, doBCtotal=False, doBChgt=False, doBCscn=False, ): # def make_timeseries(suffix = "relax", doQC = False, doBC = False): # end """ Make the timeseries - plots and netCDF files :param str suffix: "relax" or "strict" criteria :param bool doQC: incorporate the QC flags or not # KATE modified :param bool doQC1it: incorporate the first iteration QC flags or not :param bool doQC2it: incorporate the second iteration QC flags or not :param bool doQC3it: incorporate the third iteration QC flags or not # end :param bool doBC: work on the bias corrected data # KATE modified :param bool doBCtotal: work on the bias corrected data :param bool doBChgt: work on the bias corrected data :param bool doBCscn: work on the bias corrected data # end :returns: """ # KATE modified settings = set_paths_and_vars.set( doBC=doBC, doBCtotal=doBCtotal, doBChgt=doBChgt, doBCscn=doBCscn, doQC=doQC, doQC1it=doQC1it, doQC2it=doQC2it, doQC3it=doQC3it, ) # settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # end print "Do QC = {}".format(doQC) # KATE modified print "Do QC1it = {}".format(doQC1it) print "Do QC2it = {}".format(doQC2it) print "Do QC3it = {}".format(doQC3it) # end print "Do BC = {}".format(doBC) # KATE modified print "Do BCtotal = {}".format(doBCtotal) print "Do BChgt = {}".format(doBChgt) print "Do BCscn = {}".format(doBCscn) # end # monthly -> annual watermarkstring = ( "/".join(os.getcwd().split("/")[4:]) + "/" + os.path.basename(__file__) + " " + dt.datetime.strftime(dt.datetime.now(), "%d-%b-%Y %H:%M") ) # run on the actuals (which include anomalies from ERA) and the anomalies (calculated from obs-actuals, but also include the anomalies from ERA) # KATE modified to add new file name bit '_renorm19812010' for version in ["", "_renorm19812010_anomalies"]: # for version in ["", "_anomalies"]: # end if version == "": print "5x5 monthly Standard" elif version == "_anomalies": print "5x5 monthly Anomalies" for period in ["both", "day", "night"]: print period filename = "{}/{}_5x5_monthly{}_from_daily_{}_{}.nc".format( settings.DATA_LOCATION, settings.OUTROOT, version, period, suffix ) print filename ncdf_file = ncdf.Dataset(filename, "r", format="NETCDF4") lat_centres = ncdf_file.variables["latitude"] lon_centres = ncdf_file.variables["longitude"] n_obs = utils.set_MetVar_attributes( "n_obs", "Number of Observations", "Number of Observations", 1, -1, np.dtype("int64"), 0 ) OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier=False) OBS_ORDER += [n_obs] for v, var in enumerate(OBS_ORDER): print var.name var.data = ncdf_file.variables[var.name][:] # make annual and monthly timeseries mesh_lon, mesh_lat = np.meshgrid(lon_centres, lat_centres) cosines = np.cos(np.radians(mesh_lat)) full_cosines = mask_and_normalise_weights(cosines, var.data) # masked weights now sum to one for each field if var.name == "n_obs": weighted_data = var.data else: weighted_data = var.data * full_cosines plot_values = np.zeros(weighted_data.shape[0]) plot_times = [] for y in range(weighted_data.shape[0]): plot_values[y] = np.ma.sum(weighted_data[y]) plot_times += [dt.datetime(settings.START_YEAR + (y / 12), 1 + (y % 12), 1, 0, 0)] # plot the monthly data plt.clf() plt.plot(plot_times, plot_values, "r-", label="Monthly") var.mdata = plot_values monthly_times = plot_times # and annual plot_values = plot_values.reshape(-1, 12) if var.name != "n_obs": plot_values = np.mean(plot_values, axis=1) plot_times = [dt.datetime(settings.START_YEAR + y, 7, 1) for y in range(plot_values.shape[0])] plt.plot(plot_times, plot_values, "b-", label="Annual") plt.ylabel(var.units) else: # if n_obs, then have second x-axis plot_values = np.sum(plot_values, axis=1) plot_times = [dt.datetime(settings.START_YEAR + y, 7, 1) for y in range(plot_values.shape[0])] # finish off first axis ax1 = plt.gca() ax1.set_ylabel("Monthly", color="r") for tl in ax1.get_yticklabels(): tl.set_color("r") # add second axis ax2 = ax1.twinx() ax2.plot(plot_times, plot_values, "b-", label="Annual") ax2.set_ylabel("Annual", color="b") for tl in ax2.get_yticklabels(): tl.set_color("b") var.adata = plot_values annual_times = plot_times # and prettify the plot plt.title(" ".join([x.capitalize() for x in var.name.split("_")])) if var.name != "n_obs": plt.legend() plt.figtext(0.01, 0.01, watermarkstring, size=6) plt.savefig( "{}/{}_5x5_monthly{}_from_daily_{}_{}_ts.png".format( settings.PLOT_LOCATION, settings.OUTROOT, version, period, var.name ) ) # clean up ncdf_file.close() del (weighted_data) del (full_cosines) gc.collect() # write output files (annual and monthly) filename = "{}/{}_5x5_monthly{}_from_daily_{}_{}_ts_annual.nc".format( settings.DATA_LOCATION, settings.OUTROOT, version, period, suffix ) if os.path.exists(filename): os.remove(filename) write_ncdf_ts(annual_times, OBS_ORDER, filename, annual=True, do_zip=True) filename = "{}/{}_5x5_monthly{}_from_daily_{}_{}_ts_monthly.nc".format( settings.DATA_LOCATION, settings.OUTROOT, version, period, suffix ) if os.path.exists(filename): os.remove(filename) write_ncdf_ts(monthly_times, OBS_ORDER, filename, monthly=True, do_zip=True) # clean up del (plot_values) del (plot_times) del (OBS_ORDER) gc.collect() # not activated at present pentads = False if pentads: # pentad -> annual OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier=False) for v, var in enumerate(OBS_ORDER): print var.name filename = "{}/{}_1x1_pentads_from_3hrly_{}_{}_{}.nc".format( settings.DATA_LOCATION, settings.OUTROOT, var.name, period, suffix ) ncdf_file = ncdf.Dataset(filename, "r", format="NETCDF4") lat_centres = ncdf_file.variables["latitude"] lon_centres = ncdf_file.variables["longitude"] data_shape = ncdf_file.variables[var.name][:].shape # pentads mesh_lon, mesh_lat = np.meshgrid(lon_centres, lat_centres) cosines = np.cos(np.radians(mesh_lat)) plot_values = np.zeros(data_shape[0]) plot_times = [] year = copy.deepcopy(settings.START_YEAR) for ts in range(data_shape[0]): data = ncdf_file.variables[var.name][ts] full_cosines = np.ma.array(cosines) full_cosines.mask = data.mask full_cosines = full_cosines / np.sum(full_cosines) weighted_data = data * full_cosines plot_values[ts] = np.ma.sum(weighted_data) if calendar.isleap(year) and ((ts + 1) * 5) % 365 > 60: # account for 6 day pentad in leap years plot_times += [dt.datetime(year, 1, 1, 0, 0) + dt.timedelta(days=((ts + 1) * 5) % 365 + 1)] else: plot_times += [dt.datetime(year, 1, 1, 0, 0) + dt.timedelta(days=((ts + 1) * 5) % 365)] print year, ts, plot_times[-1] if ((ts + 1) * 5) % 365 == 0: year += 1 plt.clf() plt.plot(plot_times, plot_values, "r-") plt.title(var.name) plt.ylabel(var.units) # annual plot_values = plot_values.reshape(-1, 73, data_shape[-2], data_shape[-1]) plot_values = np.mean(plot_values, axis=1) plt.plot(plot_times[36::73], plot_values, "b-") plt.savefig("{}/{}_pentads_all.png".format(settings.PLOT_LOCATION, var.name)) raw_input("check") return # make_timeseries
def do_conversion(start_year=defaults.START_YEAR, end_year=defaults.END_YEAR, period="all", doBC=False, doQC=True): """ Convert dailies to pentads 1x1 :param int start_year: start year to process :param int end_year: end year to process :param str period: which period to do day/night/all? :param bool doBC: work on the bias corrected data :param bool doQC: incorporate the QC flags or not :returns: """ settings = set_paths_and_vars.set(doBC=doBC, doQC=doQC) OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier=False) for year in np.arange(start_year, end_year + 1): # set up empty data array all_dailies = np.ma.zeros([len(OBS_ORDER), utils.days_in_year(year), len(grid_lats), len(grid_lons)]) all_dailies.mask = np.zeros([len(OBS_ORDER), utils.days_in_year(year), len(grid_lats), len(grid_lons)]) all_dailies.fill_value = settings.mdi all_n_obs = np.zeros([utils.days_in_year(year), len(grid_lats), len(grid_lons)]) year_start = dt.datetime(year, 1, 1, 0, 0) for month in np.arange(12) + 1: print year, month month_start = utils.day_of_year(year, month) month_end = month_start + calendar.monthrange(year, month)[1] filename = "{}/{}_1x1_daily_{}{:02d}_{}.nc".format( settings.DATA_LOCATION, settings.OUTROOT, year, month, period ) ncdf_file = ncdf.Dataset(filename, "r", format="NETCDF4") for v, var in enumerate(OBS_ORDER): if month == 12: # run to end of year if december all_dailies[v, month_start:, :, :] = ncdf_file.variables[var.name][:] else: all_dailies[v, month_start:month_end, :, :] = ncdf_file.variables[var.name][:] # now get number of observations if month == 12: all_n_obs[month_start:, :, :] = ncdf_file.variables["n_obs"][:] else: all_n_obs[month_start:month_end, :, :] = ncdf_file.variables["n_obs"][:] if calendar.isleap(year): assert all_dailies.shape[1] == 366 # extract 6-day pentad incl_feb29th = all_dailies[:, 55:61, :, :] # remove the data of Feb 29th from array # np.ma.delete doesn't exist, so have to copy mask separately mask = all_dailies.mask all_dailies = np.delete(all_dailies, 59, 1) mask = np.delete(mask, 59, 1) all_dailies = np.ma.array(all_dailies, mask=mask) del mask # number of observations incl_feb29th_n_obs = all_n_obs[55:61, :, :] all_n_obs = np.delete(all_n_obs, 59, 0) else: assert all_dailies.shape[1] == 365 shape = all_dailies.shape all_dailies = all_dailies.reshape(shape[0], -1, 5, shape[-2], shape[-1]) n_days_per_pentad = np.ma.count(all_dailies, axis=2) if settings.doMedian: pentad_grid = utils.bn_median(all_dailies, axis=2) else: pentad_grid = np.ma.mean(all_dailies, axis=2) # clear up memory del all_dailies gc.collect() all_n_obs = all_n_obs.reshape(-1, 5, shape[-2], shape[-1]) all_n_obs = np.sum(all_n_obs, axis=1) pentad_grid.mask[ n_days_per_pentad < N_OBS ] = True # mask where fewer than 2 days have values # KW THIS IS ACTUALLY 2 - WHICH I THINK IS GOOD # the pentad containing feb 29th is the 11th in the year if calendar.isleap(year): # overwrite this with the me(di)an of a 6-day pentad if settings.doMedian: pentad_grid[:, 11, :, :] = utils.bn_median(incl_feb29th, axis=1) else: pentad_grid[:, 11, :, :] = np.ma.mean(incl_feb29th, axis=1) feb_n_days_per_pentad = np.ma.count(incl_feb29th, axis=1) pentad_grid.mask[:, 11, :, :][feb_n_days_per_pentad < N_OBS] = True n_days_per_pentad[:, 11, :, :] = feb_n_days_per_pentad all_n_obs[11, :, :] = np.sum(incl_feb29th_n_obs, axis=0) print "processed Feb 29th" times = utils.TimeVar("time", "time since 1/1/{} in hours".format(year), "hours", "time") times.data = np.arange(0, pentad_grid.shape[1]) * 5 * 24 out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_{}_{}.nc".format(year, period) utils.netcdf_write( out_filename, pentad_grid, n_days_per_pentad[0], all_n_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency="P", ) del pentad_grid del all_n_obs del n_days_per_pentad gc.collect() return # do_conversion
def combine_files(suffix = "relax", pentads = False, do3hr = False, months = False, daily = False, start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, period = "both", doQC = False, doQC1it = False, doQC2it = False, doQC3it = False, doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False): #def combine_files(suffix = "relax", pentads = False, do3hr = False, months = False, daily = False, start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, period = "both", doQC = False, doBC = False): # end ''' Combine the files, first the pentads 1x1, then the monthlies 5x5 :param str suffix: "relax" or "strict" criteria :param bool pentads: run on pentads :param bool do3hr: run on pentads created from 3hrly data (if False then run on those from daily) :param bool months: run on 5x5 monthly data :param bool daily: run on monthlies created direct from dailies (if False the run on those from 1x1 monthlies) :param int start_year: start year to process :param int end_year: end year to process :param int start_month: start month to process :param int end_month: end month to process :param str period: which period to do day/night/both? :param bool doQC: incorporate the QC flags or not # KATE modified :param bool doQC1it: incorporate the 1st iteration QC flags or not :param bool doQC2it: incorporate the 2nd iteration QC flags or not :param bool doQC3it: incorporate the 3rd iteration QC flags or not # end :param bool doBC: work on the bias corrected data # KATE modified :param bool doBCtotal: work on the bias corrected data :param bool doBChgt: work on the hieght only bias corrected data :param bool doBCscn: work on the screen only bias corrected data # end :returns: ''' # KATE modified settings = set_paths_and_vars.set(doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn, doQC = doQC, doQC1it = doQC1it, doQC2it = doQC2it, doQC3it = doQC3it) #settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # end # pentads if pentads: OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier = False) # KW make OBS_ORDER only the actual variables - remove anomalies NEWOBS_ORDER = [] for v, var in enumerate(OBS_ORDER): if "anomalies" not in var.name: NEWOBS_ORDER.append(var) del OBS_ORDER OBS_ORDER = np.copy(NEWOBS_ORDER) del NEWOBS_ORDER # set up the grids DELTA=1 grid_lats = np.arange(-90+DELTA, 90+DELTA, DELTA) grid_lons = np.arange(-180+DELTA, 180+DELTA, DELTA) Nyears = end_year - start_year + 1 # read in each variable - memory issues for v, var in enumerate(OBS_ORDER): print var.name all_pentads = np.ma.zeros((1, Nyears, 73, len(grid_lats), len(grid_lons))) all_pentads.mask = np.ones((1, Nyears, 73, len(grid_lats), len(grid_lons))) all_pentads.fill_value = settings.mdi n_obs = np.zeros((Nyears, 73, len(grid_lats), len(grid_lons))) n_grids = np.zeros((Nyears, 73, len(grid_lats), len(grid_lons))) for y, year in enumerate(np.arange(start_year, end_year + 1)): if do3hr: filename = settings.DATA_LOCATION + "{}_1x1_pentad_from_3hrly_{}_{}_{}.nc".format(settings.OUTROOT, year, period, suffix) else: filename = settings.DATA_LOCATION + "{}_1x1_pentad_{}_{}_{}.nc".format(settings.OUTROOT, year, period, suffix) ncdf_file = ncdf.Dataset(filename,'r', format='NETCDF4') time = ncdf_file.variables["time"] try: assert time.long_name == "time since 1/1/{} in hours".format(year) except AssertionError: print "time units are not as expected." print " expected time since 1/1/{} in hours".format(year) print " got {}".format(time.long_name) sys.exit() all_pentads[0, y, :, :, :] = ncdf_file.variables[var.name][:] n_obs[y, :, :, :] = ncdf_file.variables["n_obs"][:] n_grids[y, :, :, :] = ncdf_file.variables["n_obs"][:] print year if y == 0 and period == "both": lat_centres = ncdf_file.variables["latitude"] # KATE modified - this results in lats that go from 92.5 to -82,5 or 90.5 to -88.5 so I've switched the + for a - latitudes = lat_centres - (lat_centres[1] - lat_centres[0])/2. #latitudes = lat_centres + (lat_centres[1] - lat_centres[0])/2. # end lon_centres = ncdf_file.variables["longitude"] longitudes = lon_centres + (lon_centres[1] - lon_centres[0])/2. ncdf_file.close() all_pentads = all_pentads.reshape(1, -1, len(grid_lats), len(grid_lons)) # sort the times times = utils.TimeVar("time", "time since 1/1/1973 in months", "months", "time") times.data = np.arange(all_pentads.shape[1]) # and write file if do3hr: out_filename = settings.DATA_LOCATION + "{}_1x1_pentads_from_3hrly_{}_{}_{}.nc".format(settings.OUTROOT, var.name, period, suffix) else: out_filename = settings.DATA_LOCATION + "{}_1x1_pentads_{}_{}_{}.nc".format(settings.OUTROOT, var.name, period, suffix) if period == "both": utils.netcdf_write(out_filename, all_pentads, n_grids, n_obs, OBS_ORDER, latitudes, longitudes, times, frequency = "P", single = var) else: utils.netcdf_write(out_filename, all_pentads, n_grids, n_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "P", single = var) # Reset the data holding arrays and objects del OBS_ORDER gc.collect() if months: OBS_ORDER = utils.make_MetVars(settings.mdi, multiplier = False) #***************************** # monthlies for y, year in enumerate(np.arange(start_year, end_year + 1)): print year for month in np.arange(start_month, end_month + 1): print " {}".format(month) if daily: filename = settings.DATA_LOCATION + "{}_5x5_monthly_from_daily_{}{:02d}_{}_{}.nc".format(settings.OUTROOT, year, month, period, suffix) else: filename = settings.DATA_LOCATION + "{}_5x5_monthly_{}{:02d}_{}_{}.nc".format(settings.OUTROOT, year, month, period, suffix) ncdf_file = ncdf.Dataset(filename,'r', format='NETCDF4') time = ncdf_file.variables["time"] try: assert time.long_name == "time since 1/{}/{} in hours".format(month, year) except AssertionError: print "time units are not as expected." print " expected time since 1/{}/{} in hours".format(month, year) print " got {}".format(time.long_name) sys.exit() for v, var in enumerate(OBS_ORDER): nc_var = ncdf_file.variables[var.name] try: var.data = utils.ma_append(var.data, nc_var[:], axis = 0) if v == 0: n_obs = utils.ma_append(n_obs, ncdf_file.variables["n_obs"][:], axis = 0) n_grids = utils.ma_append(n_grids, ncdf_file.variables["n_grids"][:], axis = 0) except AttributeError: var.data = nc_var[:] var.data.fill_value = nc_var.missing_value if v == 0: n_obs = ncdf_file.variables["n_obs"][:] n_grids = ncdf_file.variables["n_grids"][:] if y == 0 and month == start_month and period == "both": lat_centres = ncdf_file.variables["latitude"] latitudes = lat_centres + (lat_centres[1] - lat_centres[0])/2. lon_centres = ncdf_file.variables["longitude"] longitudes = lon_centres + (lon_centres[1] - lon_centres[0])/2. # KATE modified - added an extra loop so that we can flip the latitudes for day and night too if y == 0 and month == start_month and period != "both": lat_centres = ncdf_file.variables["latitude"] # THIS IS - RATHER THAN + READY TO FLIP THE LATS latitudes = lat_centres - (lat_centres[1] - lat_centres[0])/2. lon_centres = ncdf_file.variables["longitude"] longitudes = lon_centres + (lon_centres[1] - lon_centres[0])/2. # end ncdf_file.close() # write out into big array for netCDF file all_data = np.ma.zeros((len(OBS_ORDER), var.data.shape[0], var.data.shape[1], var.data.shape[2])) all_data.mask = np.zeros((len(OBS_ORDER), var.data.shape[0], var.data.shape[1], var.data.shape[2])) for v, var in enumerate(OBS_ORDER): all_data[v, :, :, :] = var.data # KATE modified - switching the latitudes on day and night data for consistency with both if period == "day" or period == "night": # invert latitudes latitudes = latitudes[::-1] all_data = all_data[:,:,::-1,:] # variable, time, latitude, longitude # end all_data.fill_value = var.data.fill_value # extra stuff for writing # KATE modified - no longer need grid5 as we're using latitudes and longitudes #DELTA=5 #grid5_lats = np.arange(-90+DELTA, 90+DELTA, DELTA) #grid5_lons = np.arange(-180+DELTA, 180+DELTA, DELTA) # end # KATE modified - START_YEAR not defined, should be start_year times = utils.TimeVar("time", "time since 1/1/{} in months".format(start_year), "months", "time") #times = utils.TimeVar("time", "time since 1/1/{} in months".format(START_YEAR), "months", "time") # end times.data = np.arange(var.data.shape[0]) # and write file if daily: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_from_daily_{}_{}.nc".format(period, suffix) else: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_{}_{}.nc".format(period, suffix) # KATE modified - now always using latitudes and longitudes utils.netcdf_write(out_filename, all_data, n_grids, n_obs, OBS_ORDER, latitudes, longitudes, times, frequency = "Y") #if period == "both": # utils.netcdf_write(out_filename, all_data, n_grids, n_obs, OBS_ORDER, latitudes, longitudes, times, frequency = "Y") #else: # utils.netcdf_write(out_filename, all_data, n_grids, n_obs, OBS_ORDER, grid5_lats, grid5_lons, times, frequency = "Y") # end return # combine_files
def calculate_climatology(suffix = "relax", start_year = 1981, end_year = 2010, period = "both", do3hr = False, doQC = False, doBC = False): ''' Make 1x1 pentad climatology :param str suffix: "relax" or "strict" criteria :param int start_year: start year to process :param int end_year: end year to process :param str period: which period to do day/night/both? :param bool do3hr: run on 3hr --> pentad data :param bool doQC: incorporate the QC flags or not :param bool doBC: work on the bias corrected data :returns: ''' settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) if suffix == "relax": N_YEARS_PRESENT = 10 # number of years present to calculate climatology elif suffix == "strict": N_YEARS_PRESENT = 15 # number of years present to calculate climatology print "Do 3hrly: {}".format(do3hr) N_YEARS = end_year - start_year + 1 # read in each variable - memory issues all_clims = np.ma.zeros([len(OBS_ORDER), 73, len(grid_lats), len(grid_lons)]) # KW - why set up as np.ones? all_clims.mask = np.zeros([len(OBS_ORDER), 73, len(grid_lats), len(grid_lons)]) all_stds = np.ma.zeros([len(OBS_ORDER), 73, len(grid_lats), len(grid_lons)]) all_stds.mask = np.zeros([len(OBS_ORDER), 73, len(grid_lats), len(grid_lons)]) # KW no mask??? I've set one with fill_value as -1 - should the mask be .zeros or .ones though? all_n_obs = np.ma.zeros([N_YEARS, 73, len(grid_lats), len(grid_lons)]) all_n_obs.mask = np.zeros([N_YEARS, 73, len(grid_lats), len(grid_lons)]) all_n_obs.fill_value = -1 for v, var in enumerate(OBS_ORDER): print var.name # number of pentads = 365/5 = 73 # set up empty data array all_pentads = np.ma.zeros([N_YEARS, 73, len(grid_lats), len(grid_lons)]) # sets up a mask of 'False' = not masked! all_pentads.mask = np.zeros([N_YEARS, 73, len(grid_lats), len(grid_lons)]) all_pentads.fill_value = settings.mdi # read in relevant years for y, year in enumerate(np.arange(start_year, end_year + 1)): print year if do3hr: filename = settings.DATA_LOCATION + "{}_1x1_pentad_from_3hrly_{}_{}_{}.nc".format(settings.OUTROOT, year, period, suffix) else: filename = settings.DATA_LOCATION + "{}_1x1_pentad_{}_{}_{}.nc".format(settings.OUTROOT, year, period, suffix) ncdf_file = ncdf.Dataset(filename,'r', format='NETCDF4') all_pentads[y, :, :, :] = ncdf_file.variables[var.name][:] if v == 0: all_n_obs[y, :, :, :] = ncdf_file.variables["n_obs"][:] # years x pentads x lats x lons n_grids = np.ma.count(all_pentads, axis = 0) # collapse down the years if settings.doMedian: all_clims[v, :, :, :] = utils.bn_median(all_pentads, axis = 0) else: all_clims[v, :, :, :] = np.ma.mean(all_pentads, axis = 0) all_stds[v, :, :, :] = np.ma.std(all_pentads, axis = 0) # mask where fewer than 50% of years have data locs = np.ma.where(n_grids < N_YEARS_PRESENT) all_clims[v, :, :, :].mask[locs] = True # KW should probably mask stdev too - although unmasked it does show the potential coverage all_stds[v, :, :, :].mask[locs] = True if settings.plots and v == 0: import matplotlib.pyplot as plt plt.clf() plt.hist(n_grids.reshape(-1), bins = np.arange(-1,32), align = "left", log = True, rwidth=0.5) plt.axvline(x = N_YEARS_PRESENT-0.5, color = "r") plt.title("Number of years present in each pentad") plt.xlabel("Number of years (max = 30)") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "pentad_clims_n_years_{}_{}_{}.png".format(year, period, suffix)) # now process number of observations (KW all_n_obs wasn't a masked array - so have set it up as one - BUT not really convinced this # is working as it should. No import numpy.ma? all_obs = np.ma.sum(all_n_obs, axis = 0) # set up time array times = utils.TimeVar("time", "time since 1/1/{} in days".format(1), "days", "time") times.data = np.arange(0, 73) * 5 # write files if do3hr: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_climatology_from_3hrly_{}_{}.nc".format(period, suffix) else: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_climatology_{}_{}.nc".format(period, suffix) utils.netcdf_write(out_filename, all_clims, n_grids, all_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "P") if do3hr: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_stdev_from_3hrly_{}_{}.nc".format(period, suffix) else: out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_stdev_{}_{}.nc".format(period, suffix) utils.netcdf_write(out_filename, all_stds, n_grids, all_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "P") # test distribution of obs with grid boxes if do3hr: outfile = file(settings.OUTROOT + "_1x1_pentad_climatology_from_3hrly_{}_{}.txt".format(period, suffix), "w") else: outfile = file(settings.OUTROOT + "_1x1_pentad_climatology_{}_{}.txt".format(period, suffix), "w") utils.boxes_with_n_obs(outfile, all_obs, all_clims[0], N_YEARS_PRESENT) return # calculate_climatology
def set_up_merge(suffix = "relax", clims = False, months = False, pentads = False, start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, doQC = False, doQC1it = False, doQC2it = False, doQC3it = False, doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False): #def set_up_merge(suffix = "relax", clims = False, months = False, pentads = False, start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, doQC = False, doBC = False): # end ''' Obtain file roots and set processes running :param str suffix: "relax" or "strict" criteria :param bool clims: run the climatologies :param bool months: run the climatologies :param bool pentads: run the annual pentads :param int start_year: start year to process :param int end_year: end year to process :param int start_month: start month to process :param int end_month: end month to process :param bool doQC: incorporate the QC flags or not # KATE modified :param bool doQC1it: incorporate the QC flags or not :param bool doQC2it: incorporate the QC flags or not :param bool doQC3it: incorporate the QC flags or not # end :param bool doBC: work on the bias corrected data # KATE modified NOTE THAT I HAVE OVERWRITTEN settings.doMedian to force MEAN instead # end ''' # KATE modified settings = set_paths_and_vars.set(doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn, doQC = doQC, doQC1it = doQC1it, doQC2it = doQC2it, doQC3it = doQC3it) #settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # end if clims: print "Processing Climatologies" # fileroot = get_fileroot(settings, climatology = True) # do_merge(fileroot, settings.mdi, suffix, doMedian = settings.doMedian) fileroot = get_fileroot(settings, climatology = True, do3hr = True) # KATE MEDIAN WATCH # KATE modified - forcing MEAN do_merge(fileroot, settings.mdi, suffix, clims = True, doMedian = False) #do_merge(fileroot, settings.mdi, suffix, clims = True, doMedian = settings.doMedian) # end # and stdev print "Processing Standard Deviations" fileroot = get_fileroot(settings, climatology = True, do3hr = True, stdev = True) # KATE MEDIAN WATCH # KATE modified - forcing MEAN do_merge(fileroot, settings.mdi, suffix, clims = True, doMedian = False) #do_merge(fileroot, settings.mdi, suffix, clims = True, doMedian = settings.doMedian) # end if pentads: print "Processing Pentads" # fileroot = get_fileroot(settings, pentads = True) # do_merge(fileroot, settings.mdi, suffix, doMedian = settings.doMedian) for year in np.arange(start_year, end_year + 1): print year fileroot = get_fileroot(settings, pentads = True, do3hr = True, time = [year]) # KATE MEDIAN WATCH # KATE modified - forcing MEAN do_merge(fileroot, settings.mdi, suffix, doMedian = False) #do_merge(fileroot, settings.mdi, suffix, doMedian = settings.doMedian) # end if months: print "Processing Monthly Files" # KATE modified - START_YEAR not defined - commented these out as they are all set in the call to function #start_year = START_YEAR #end_year = END_YEAR #start_month = 1 #end_month = 12 # end for year in np.arange(start_year, end_year + 1): print year for month in np.arange(start_month, end_month + 1): print " {}".format(month) # fileroot = get_fileroot(settings, months = True, time = [year, month]) # do_merge(fileroot, settings.mdi, suffix, doMedian = settings.doMedian) fileroot = get_fileroot(settings, months = True, time = [year, month], daily = True) # KATE MEDIAN WATCH # KATE modified - forcing MEAN do_merge(fileroot, settings.mdi, suffix, doMedian = False) #do_merge(fileroot, settings.mdi, suffix, doMedian = settings.doMedian) # end return # set_up_merge
def do_gridding(suffix = "relax", start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, doQC = False, doQC1it = False, doQC2it = False, doQC3it = False, doSST_SLP = False, doBC = False, doBCtotal = False, doBChgt = False, doBCscn = False, doUncert = False): #def do_gridding(suffix = "relax", start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, start_month = 1, end_month = 12, doQC = False, doSST_SLP = False, doBC = False, doUncert = False): # end ''' Do the gridding, first to 3hrly 1x1, then to daily 1x1 and finally monthly 1x1 and 5x5 :param str suffix: "relax" or "strict" criteria :param int start_year: start year to process :param int end_year: end year to process :param int start_month: start month to process :param int end_month: end month to process :param bool doQC: incorporate the QC flags or not :param bool doQC1it: incorporate the first iteration (no buddy) QC flags or not :param bool doQC2it: incorporate the second iteration (no buddy) QC flags or not :param bool doQC3it: incorporate the third iteration (buddy) QC flags or not :param bool doSST_SLP: process additional variables or not :param bool doBC: work on the bias corrected data :param bool doBCtotal: work on the full bias corrected data :param bool doBChgt: work on the height only bias corrected data :param bool doBCscn: work on the screen only bias corrected data :param bool doUncert: work on files with uncertainty information (not currently used) :returns: ''' # KATE modified settings = set_paths_and_vars.set(doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn, doQC = doQC, doQC1it = doQC1it, doQC2it = doQC2it, doQC3it = doQC3it) #settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # end # KATE modified - added other BC options # if doBC: if doBC | doBCtotal | doBChgt | doBCscn: # end fields = mds.TheDelimitersExt # extended (BC) else: fields = mds.TheDelimitersStd # Standard # KATE modified - added other BC options # OBS_ORDER = utils.make_MetVars(settings.mdi, doSST_SLP = doSST_SLP, multiplier = True, doBC = doBC) # ensure that convert from raw format at writing stage with multiplier OBS_ORDER = utils.make_MetVars(settings.mdi, doSST_SLP = doSST_SLP, multiplier = True, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # ensure that convert from raw format at writing stage with multiplier # end # KW switching between 4 ('_strict') for climatology build and 2 for anomaly buily ('_relax') - added subscripts to files if suffix == "relax": N_OBS_DAY = 2 # KW ok for anomalies but this was meant to be 4 for dailies_all? and 2 for dailies_night/day? N_OBS_FRAC_MONTH = 0.3 elif suffix == "strict": N_OBS_DAY = 4 N_OBS_FRAC_MONTH = 0.3 # flags to check on and values to allow through # KATE modified if doQC1it | doQC2it: these_flags = {"ATclim":0,"ATrep":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0} else: these_flags = {"ATbud":0, "ATclim":0,"ATrep":0,"DPTbud":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0} #these_flags = {"ATbud":0, "ATclim":0,"ATrep":0,"DPTbud":0,"DPTclim":0,"DPTssat":0,"DPTrep":0,"DPTrepsat":0} # end # spin through years and months to read files for year in np.arange(start_year, end_year + 1): for month in np.arange(start_month, end_month + 1): times = utils.TimeVar("time", "time since 1/{}/{} in hours".format(month, year), "hours", "time") grid_hours = np.arange(0, 24 * calendar.monthrange(year, month)[1], DELTA_HOUR) times.data = grid_hours # process the monthly file # KATE modified - added other BC options # if doBC: if doBC | doBCtotal | doBChgt | doBCscn: # end filename = "new_suite_{}{:02d}_{}_extended.txt".format(year, month, settings.OUTROOT) else: filename = "new_suite_{}{:02d}_{}.txt".format(year, month, settings.OUTROOT) # KATE modified - added other BC options # raw_platform_data, raw_obs, raw_meta, raw_qc = utils.read_qc_data(filename, settings.ICOADS_LOCATION, fields, doBC = doBC) raw_platform_data, raw_obs, raw_meta, raw_qc = utils.read_qc_data(filename, settings.ICOADS_LOCATION, fields, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end # extract observation details lats, lons, years, months, days, hours = utils.process_platform_obs(raw_platform_data) # test dates *KW - SHOULDN'T NEED THIS - ONLY OBS PASSING DATE CHECK ARE INCLUDED* # *RD* - hasn't run yet but will leave it in just in case of future use. if not utils.check_date(years, year, "years", filename): sys.exit(1) if not utils.check_date(months, month, "months", filename): sys.exit(1) # KATE modified - seems to be an error with missing global name plots so have changed to settings.plots # Choose this one to only output once per decade #if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # Choose this one to output a plot for each month if settings.plots: #if plots and (year in [1973, 1983, 1993, 2003, 2013]): # end # plot the distribution of hours import matplotlib.pyplot as plt plt.clf() plt.hist(hours, np.arange(-100,2500,100)) plt.ylabel("Number of observations") plt.xlabel("Hours") plt.xticks(np.arange(-300, 2700, 300)) plt.savefig(settings.PLOT_LOCATION + "obs_distribution_{}{:02d}_{}.png".format(year, month, suffix)) # only for a few of the variables for variable in OBS_ORDER: if variable.name in ["marine_air_temperature", "dew_point_temperature", "specific_humidity", "relative_humidity", "marine_air_temperature_anomalies", "dew_point_temperature_anomalies", "specific_humidity_anomalies", "relative_humidity_anomalies"]: #plot_qc_diagnostics.values_vs_lat(variable, lats, raw_obs[:, variable.column], raw_qc, these_flags, settings.PLOT_LOCATION + "qc_actuals_{}_{}{:02d}_{}.png".format(variable.name, year, month, suffix), multiplier = variable.multiplier, doBC = doBC) plot_qc_diagnostics.values_vs_lat_dist(variable, lats, raw_obs[:, variable.column], raw_qc, these_flags, \ settings.PLOT_LOCATION + "qc_actuals_{}_{}{:02d}_{}.png".format(variable.name, year, month, suffix), multiplier = variable.multiplier, \ # KATE modified - added other BC options doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end # QC sub-selection # KATE modified - added QC iterations but also think this needs to include the bias corrected versions because the QC flags need to be applied to those too. # Not sure what was happening previously with the doBC run - any masking to QC'd obs? if doQC | doQC1it | doQC2it | doQC3it | doBC | doBCtotal | doBChgt | doBCscn: #if doQC: # end print "Using {} as flags".format(these_flags) # KATE modified - BC options # mask = utils.process_qc_flags(raw_qc, these_flags, doBC = doBC) mask = utils.process_qc_flags(raw_qc, these_flags, doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) # end print "All Obs: ",len(mask) print "Good Obs: ",len(mask[np.where(mask == 0)]) print "Bad Obs: ",len(mask[np.where(mask == 1)]) #pdb.set_trace() complete_mask = np.zeros(raw_obs.shape) for i in range(raw_obs.shape[1]): complete_mask[:,i] = mask clean_data = np.ma.masked_array(raw_obs, mask = complete_mask) # end else: print "No QC flags selected" clean_data = np.ma.masked_array(raw_obs, mask = np.zeros(raw_obs.shape)) # discretise hours hours = utils.make_index(hours, DELTA_HOUR, multiplier = 100) # get the hours since start of month hours_since = ((days - 1) * 24) + (hours * DELTA_HOUR) # discretise lats/lons lat_index = utils.make_index(lats, DELTA_LAT, multiplier = 100) lon_index = utils.make_index(lons, DELTA_LON, multiplier = 100) lat_index += ((len(grid_lats)-1)/2) # and as -ve indices are unhelpful, roll by offsetting by most westward lon_index += ((len(grid_lons)-1)/2) # or most southerly so that (0,0) is (-90,-180) # NOTE - ALWAYS GIVING TOP-RIGHT OF BOX TO GIVE < HARD LIMIT (as opposed to <=) # do the gridding # extract the full grid, number of obs, and day/night flag # KATE MEDIAN WATCH This is hard coded to doMedian (rather than settings.doMedian) - OK WITH MEDIAN HERE!!! # KATE modified - to add settings.doMedian instead of just doMedian which seems to be consistent with the other bits and BC options raw_month_grid, raw_month_n_obs, this_month_period = utils.grid_1by1_cam(clean_data, raw_qc, hours_since, lat_index, lon_index, \ grid_hours, grid_lats, grid_lons, OBS_ORDER, settings.mdi, doMedian = settings.doMedian, \ doBC = doBC, doBCtotal = doBCtotal, doBChgt = doBChgt, doBCscn = doBCscn) #raw_month_grid, raw_month_n_obs, this_month_period = utils.grid_1by1_cam(clean_data, raw_qc, hours_since, lat_index, lon_index, grid_hours, grid_lats, grid_lons, OBS_ORDER, settings.mdi, doMedian = True, doBC = doBC) # end print "successfully read data into 1x1 3hrly grids" # create matching array size this_month_period = np.tile(this_month_period, (len(OBS_ORDER),1,1,1)) for period in ["all", "day", "night"]: if period == "day": this_month_grid = np.ma.masked_where(this_month_period == 1, raw_month_grid) this_month_obs = np.ma.masked_where(this_month_period[0] == 1, raw_month_n_obs) # and take first slice to re-match the array size elif period == "night": this_month_grid = np.ma.masked_where(this_month_period == 0, raw_month_grid) this_month_obs = np.ma.masked_where(this_month_period[0] == 0, raw_month_n_obs) # and take first slice to re-match the array size else: this_month_grid = copy.deepcopy(raw_month_grid) this_month_obs = copy.deepcopy(raw_month_n_obs) # KATE modified # If SwitchOutput == 1 then we're in test mode - output interim files!!! if (SwitchOutput == 1): # have one month of gridded data. out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_3hr_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) utils.netcdf_write(out_filename, this_month_grid, np.zeros(this_month_obs.shape), this_month_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "H") ## have one month of gridded data. #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_3hr_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) #utils.netcdf_write(out_filename, this_month_grid, np.zeros(this_month_obs.shape), this_month_obs, OBS_ORDER, grid_lats, grid_lons, times, frequency = "H") # end # now average over time # Dailies daily_hours = grid_hours.reshape(-1, 24/DELTA_HOUR) shape = this_month_grid.shape this_month_grid = this_month_grid.reshape(shape[0], -1, 24/DELTA_HOUR, shape[2], shape[3]) this_month_obs = this_month_obs.reshape(-1, 24/DELTA_HOUR, shape[2], shape[3]) # KATE MEDIAN WATCH - settings.doMedian is generally set to True - I think we may want the MEAN HERE!!! # KATE modified - to hard wire in MEAN here daily_grid = np.ma.mean(this_month_grid, axis = 2) #if settings.doMedian: # daily_grid = np.ma.median(this_month_grid, axis = 2) #else: # daily_grid = np.ma.mean(this_month_grid, axis = 2) # end daily_grid.fill_value = settings.mdi # filter on number of observations/day n_hrs_per_day = np.ma.count(this_month_grid, axis = 2) n_obs_per_day = np.ma.sum(this_month_obs, axis = 1) if period == "all": bad_locs = np.where(n_hrs_per_day < N_OBS_DAY) # at least 2 of possible 8 3-hourly values (6hrly data *KW OR AT LEAST 4 3HRLY OBS PRESENT*) else: bad_locs = np.where(n_hrs_per_day < np.floor(N_OBS_DAY / 2.)) # at least 1 of possible 8 3-hourly values (6hrly data *KW OR AT LEAST 4 3HRLY OBS PRESENT*) daily_grid.mask[bad_locs] = True # KATE modified - added SwitchOutput to if loop if (SwitchOutput == 1) and settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): #if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # end # plot the distribution of hours plt.clf() plt.hist(n_hrs_per_day.reshape(-1), bins = np.arange(-1,10), align = "left", log = True, rwidth=0.5) if period == "all": plt.axvline(x = N_OBS_DAY-0.5, color = "r") else: plt.axvline(x = np.floor(N_OBS_DAY / 2.)-0.5, color = "r") plt.title("Number of 1x1-3hrly in each 1x1-daily grid box") plt.xlabel("Number of 3-hrly observations (max = 8)") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_grids_1x1_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) plt.clf() plt.hist(n_obs_per_day.reshape(-1), bins = np.arange(-5,100,5), log = True, rwidth=0.5) plt.title("Total number of raw observations in each 1x1 daily grid box") plt.xlabel("Number of raw observations") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_obs_1x1_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # clear up memory del this_month_grid del this_month_obs gc.collect() # KATE modified # If SwitchOutput == 1 then we're in test mode - output interim files!!! if (SwitchOutput == 1): # write dailies file times.data = daily_hours[:,0] out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) utils.netcdf_write(out_filename, daily_grid, n_hrs_per_day[0], n_obs_per_day, OBS_ORDER, grid_lats, grid_lons, times, frequency = "D") #times.data = daily_hours[:,0] #out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) #utils.netcdf_write(out_filename, daily_grid, n_hrs_per_day[0], n_obs_per_day, OBS_ORDER, grid_lats, grid_lons, times, frequency = "D") # end # Monthlies times.data = daily_hours[0,0] # KATE modified - commenting out as we don't need this anymore # if settings.doMedian: # monthly_grid = np.ma.median(daily_grid, axis = 1) # else: # monthly_grid = np.ma.mean(daily_grid, axis = 1) # # monthly_grid.fill_value = settings.mdi # # # filter on number of observations/month # n_grids_per_month = np.ma.count(daily_grid, axis = 1) # bad_locs = np.where(n_grids_per_month < calendar.monthrange(year, month)[1] * N_OBS_FRAC_MONTH) # 30% of possible daily values # monthly_grid.mask[bad_locs] = True # # # number of raw observations # n_obs_per_month = np.ma.sum(n_obs_per_day, axis = 0) # # if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # # plot the distribution of days # # plt.clf() # plt.hist(n_obs_per_month.reshape(-1), bins = np.arange(-10,500,10), log = True, rwidth=0.5) # plt.title("Total number of raw observations in each 1x1 monthly grid box") # plt.xlabel("Number of raw observations") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_obs_1x1_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # plt.clf() # plt.hist(n_grids_per_month[0].reshape(-1), bins = np.arange(-2,40,2), align = "left", log = True, rwidth=0.5) # plt.axvline(x = calendar.monthrange(year, month)[1] * N_OBS_FRAC_MONTH, color="r") # plt.title("Total number of 1x1 daily grids in each 1x1 monthly grid") # plt.xlabel("Number of 1x1 daily grids") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_grids_1x1_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # # write monthly 1x1 file # out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_monthly_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) # utils.netcdf_write(out_filename, monthly_grid, n_grids_per_month[0], n_obs_per_month, OBS_ORDER, grid_lats, grid_lons, times, frequency = "M") # # # now to re-grid to coarser resolution # # KW # Here we may want to use the mean because its a large area but could be sparsely # # populated with quite different climatologies so we want # # the influence of the outliers (we've done our best to ensure these are good values) # # # go from monthly 1x1 to monthly 5x5 - retained as limited overhead # monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(monthly_grid, n_obs_per_month, grid_lats, grid_lons, doMedian = settings.doMedian, daily = False) # out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) # # utils.netcdf_write(out_filename, monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, OBS_ORDER, grid5_lats, grid5_lons, times, frequency = "M") # # if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # # plot the distribution of days # # plt.clf() # plt.hist(monthly_5by5_n_obs.reshape(-1), bins = np.arange(0,100,5), log = True, rwidth=0.5) # plt.title("Total number of raw observations in each 5x5 monthly grid box") # plt.xlabel("Number of raw observations") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_obs_5x5_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # plt.clf() # plt.hist(monthly_5by5_n_grids.reshape(-1), bins = np.arange(-2,30,2), align = "left", log = True, rwidth=0.5) # plt.axvline(x = 1, color="r") # plt.title("Total number of 1x1 monthly grids in each 5x5 monthly grid") # plt.xlabel("Number of 1x1 monthly grids") # plt.ylabel("Frequency (log scale)") # plt.savefig(settings.PLOT_LOCATION + "n_grids_5x5_monthly_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) # # # clear up memory # del monthly_grid # del monthly_5by5 # del monthly_5by5_n_grids # del monthly_5by5_n_obs # del n_grids_per_month # del n_obs_per_month # del n_hrs_per_day # gc.collect() # end # go direct from daily 1x1 to monthly 5x5 # KATE MEDIAN WATCH - settings.doMedian is generally set to True - I think we may want the MEAN HERE!!! # KATE modified - to hard wire in MEAN here monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(daily_grid, n_obs_per_day, grid_lats, grid_lons, doMedian = False, daily = True) #monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, grid5_lats, grid5_lons = utils.grid_5by5(daily_grid, n_obs_per_day, grid_lats, grid_lons, doMedian = settings.doMedian, daily = True) # end out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_5x5_monthly_from_daily_{}{:02d}_{}_{}.nc".format(year, month, period, suffix) utils.netcdf_write(out_filename, monthly_5by5, monthly_5by5_n_grids, monthly_5by5_n_obs, OBS_ORDER, grid5_lats, grid5_lons, times, frequency = "M") if settings.plots and (year in [1973, 1983, 1993, 2003, 2013]): # plot the distribution of days plt.clf() plt.hist(monthly_5by5_n_obs.reshape(-1), bins = np.arange(-10,1000,10), log = True, rwidth=0.5) plt.title("Total number of raw observations in each 5x5 monthly grid box") plt.xlabel("Number of raw observations") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_obs_5x5_monthly_from_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) plt.clf() plt.hist(monthly_5by5_n_grids.reshape(-1), bins = np.arange(-5,100,5), align = "left", log = True, rwidth=0.5) plt.axvline(x = (0.3 * daily_grid.shape[0]), color="r") plt.title("Total number of 1x1 daily grids in each 5x5 monthly grid") plt.xlabel("Number of 1x1 daily grids") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "n_grids_5x5_monthly_from_daily_{}{:02d}_{}_{}.png".format(year, month, period, suffix)) del daily_grid del monthly_5by5 del n_obs_per_day del monthly_5by5_n_grids del monthly_5by5_n_obs gc.collect() return # do_gridding
def do_conversion(suffix = "relax", start_year = defaults.START_YEAR, end_year = defaults.END_YEAR, period = "all", doQC = False, doBC = False): ''' Convert 3 hrlies to pentads 1x1 First get pentad average of 3hrly values (so values at 0, 3, 6, ... averaged over 5 days) Then get average over the pentad. :param str suffix: "relax" or "strict" criteria :param int start_year: start year to process :param int end_year: end year to process :param str period: which period to do day/night/all? :param bool doQC: incorporate the QC flags or not :param bool doBC: work on the bias corrected data :returns: ''' settings = set_paths_and_vars.set(doBC = doBC, doQC = doQC) # KW Added SUFFIX variable because all hourlies/dailies/monthlies now have suffix 'strict' (4/2 per daily/day-night) # or 'relax' (2/1 per daily/day-night) if suffix == "relax": N_OBS_OVER_DAYS = 1 # at least 1 obs at this 3 hr timestamp from 5 days in pentad N_OBS_OVER_PENTAD = 2 elif suffix == "strict": N_OBS_OVER_DAYS = 2 N_OBS_OVER_PENTAD = 4 # at least 4 timestamps (of 8) in pentad, could be 2 for local 'relax' setting N_OBS_PER_DAY = 24/DELTA_HOUR for year in np.arange(start_year, end_year + 1): all_pentads = np.ma.zeros([len(OBS_ORDER), 73, len(grid_lats), len(grid_lons)]) all_pentads.mask = np.zeros([len(OBS_ORDER), 73, len(grid_lats), len(grid_lons)]) # read in a years worth of 3hrly data for v, var in enumerate(OBS_ORDER): # arrays too massive to process all variables at once. print var.name var_3hrlys = read_data(settings, suffix, var.name, year, grid_lats, grid_lons, period, N_OBS_PER_DAY) # reshape to days x 3hrly obs (365(366),8,180,360) var_3hrlys = var_3hrlys.reshape(-1, N_OBS_PER_DAY, var_3hrlys.shape[1], var_3hrlys.shape[2]) # process the leap-year if appropriate if calendar.isleap(year): var_3hrlys, incl_feb29th = process_february(var_3hrlys, doMask = True) else: assert var_3hrlys.shape[0] == 365 # get pentadly values for each timestep (73,5,8,180,360) shape = var_3hrlys.shape var_3hrlys = var_3hrlys.reshape(-1, 5, shape[-3], shape[-2], shape[-1]) # n_pentads x days x hrs x lat x lon n_days_per_timestamp = np.ma.count(var_3hrlys, axis = 1) # n_pentads x hrs x lat x lon # get average at each timestamp across the pentad - so have N_OBS_PER_DAY averaged values per pentad if settings.doMedian: pentad_3hrly_grid = utils.bn_median(var_3hrlys, axis = 1) # n_pentads x hrs x lat x lon else: pentad_3hrly_grid = np.ma.mean(var_3hrlys, axis = 1) # n_pentads x hrs x lat x lon pentad_3hrly_grid.mask[n_days_per_timestamp < N_OBS_OVER_DAYS] = True # mask where fewer than N_OBS_OVER_DAYS days have values # clear up memory del var_3hrlys gc.collect() # the pentad containing feb 29th is the 11th in the year (KW actually its the 12th, so the 11 in array speak which is what you have done) if calendar.isleap(year): # overwrite this with the me(di)an of a 6-day pentad if settings.doMedian: pentad_3hrly_grid[11, :, :, :] = utils.bn_median(incl_feb29th, axis = 0) else: pentad_3hrly_grid[11, :, :, :] = np.ma.mean(incl_feb29th, axis = 0) feb_n_days_per_timestamp = np.ma.count(incl_feb29th, axis = 0) pentad_3hrly_grid.mask[11, :, :, :][feb_n_days_per_timestamp < N_OBS_OVER_DAYS] = True n_days_per_timestamp[11, :, :, :] = feb_n_days_per_timestamp print "processed Feb 29th" if settings.plots and v == 0: import matplotlib.pyplot as plt plt.clf() plt.hist(n_days_per_timestamp.reshape(-1), bins = np.arange(-1,7), align = "left", log = True, rwidth=0.5) plt.axvline(x = N_OBS_OVER_DAYS-0.5, color = "r") plt.title("Number of days with obs at each 3hrly timestamp (over entire year)") plt.xlabel("Number of days (max = 5)") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "pentads_n_days_{}_{}_{}.png".format(year, period, suffix)) # get single pentad values n_hrs_per_pentad = np.ma.count(pentad_3hrly_grid, axis = 1) # get the number of pentad-hours present in each pentad n_grids_per_pentad = np.sum(n_days_per_timestamp, axis = 1) # get the number of 3hrly 1x1 grids included per pentad 1x1 # get average at each timestamp across the pentad - so have N_OBS_PER_DAY values per pentad if settings.doMedian: pentad_grid = utils.bn_median(pentad_3hrly_grid, axis = 1) else: pentad_grid = np.ma.mean(pentad_3hrly_grid, axis = 1) if period == "all": # KW are you sure this should be n_hrs_per_pentad and not n_grids_per_pentad here? I think it should pentad_grid.mask[n_hrs_per_pentad < N_OBS_OVER_PENTAD] = True # mask where fewer than N_OBS_OVER_PENTAD hours have values else: # KW are you sure this should be n_hrs_per_pentad and not n_grids_per_pentad here? I think it should pentad_grid.mask[n_hrs_per_pentad < (N_OBS_OVER_PENTAD/2.)] = True # mask where fewer than N_OBS_OVER_PENTAD hours have values all_pentads[v, :, :, :] = pentad_grid # diagnostics plots of obs/grids per pentad if settings.plots and v == 0: plt.clf() plt.hist(n_hrs_per_pentad.reshape(-1), bins = np.arange(-1,10), align = "left", log = True, rwidth=0.5) if period == "all": plt.axvline(x = N_OBS_OVER_PENTAD-0.5, color = "r") else: plt.axvline(x = (N_OBS_OVER_PENTAD/2.)-0.5, color = "r") plt.title("Number of hrs with obs in each pentad (over entire year)") plt.xlabel("Number of days (max = 8)") plt.ylabel("Frequency (log scale)") plt.savefig(settings.PLOT_LOCATION + "pentads_n_hrs_{}_{}_{}.png".format(year, period, suffix)) # clear up memory del pentad_3hrly_grid del pentad_grid gc.collect() # done all main variables. Now for number of observations print "n_obs" n_obs = read_data(settings, suffix, "n_obs", year, grid_lats, grid_lons, period, N_OBS_PER_DAY) # KW so we've gone from 8*365hrs,lats,lons to 365,8,lats,lons n_obs = n_obs.reshape(-1, N_OBS_PER_DAY, n_obs.shape[1], n_obs.shape[2]) if calendar.isleap(year): n_obs, incl_feb29th = process_february(n_obs, doMask = True) else: assert n_obs.shape[0] == 365 shape = n_obs.shape # KW so we're now at pentads, 5days, 8hours, lats, lons n_obs = n_obs.reshape(-1, 5, shape[-3], shape[-2], shape[-1]) # pentads x days x hours x lat x lon # KW This should sum over the 5days leaving pentads, 8hrs, lats, lons # n_obs has -1 as missing data!!! So sum will not work properly # set up fill_value as -1 n_obs.fill_value = -1 n_obs_per_3hrly_pentad = np.ma.sum(n_obs, axis = 1) n_obs_per_3hrly_pentad.fill_value = -1 if calendar.isleap(year): n_obs_per_3hrly_pentad[11, :, :, :] = np.ma.sum(incl_feb29th, axis = 0) n_obs_per_pentad = np.ma.sum(n_obs_per_3hrly_pentad, axis = 1) # and write out times = utils.TimeVar("time", "time since 1/1/{} in hours".format(year), "hours", "time") times.data = np.arange(0, all_pentads.shape[1]) * 5 * 24 out_filename = settings.DATA_LOCATION + settings.OUTROOT + "_1x1_pentad_from_3hrly_{}_{}_{}.nc".format(year, period, suffix) utils.netcdf_write(out_filename, all_pentads, n_grids_per_pentad, n_obs_per_pentad, OBS_ORDER, grid_lats, grid_lons, times, frequency = "P") return # do_conversion