def shuffle(lat: int, lon: int, date_index_sample: list): """! Restore spacial correlations inside a forecast (inside each ensemble member) using the Schaake shuffle. Save results to individual netCDF grid files. @param lat: latitude index @param lon: longitude index @param date_index_sample: list of date indices """ observed = xr.open_dataset(settings.SMIPS_AGG, decode_times=False) fc = xr.open_dataset(settings.forecast_agg(settings.placeholder_date)) lats, lons = source_cube.get_lat_lon_values() obs_pre_shuffle = np.zeros((9, 1000)) coord_observed = observed.blended_precipitation.values[:, lat, lon] fc_pre_shuffle = fc.forecast_value.values[lat, lon] for i in range(len(date_index_sample)): for lead in range(9): # read in the SMIPS data for the date and save to array obs_pre_shuffle[lead, i] = coord_observed[date_index_sample[i] + lead] # make and fill smips array for lead in range(9): fc_to_shuffle = fc_pre_shuffle[lead] obs_to_shuffle = obs_pre_shuffle[lead] # pass the SMIPS and forecast data arrays to shuffle function shuffled_fc = shuffle_random_ties(fc_to_shuffle, obs_to_shuffle) # save shuffled_fc to netcdf forecast_cube.add_to_netcdf_cube( settings.shuffled_forecast_filename(settings.placeholder_date, lats[lat], lons[lon]), lead, shuffled_fc)
def create_parameter_files(): """! Generate forecast parameters and save to netCDF files. """ lats, lons = source_cube.get_lat_lon_values() for lat in lats: if bot_lat <= lat <= top_lat: for lon in lons: if left_lon <= lon <= right_lon: try: parameter_cube.generate_forecast_parameters(lat, lon) except ValueError: # coordinates don't have data or don't have a recognized timezone continue parameter_cube.aggregate_netcdf()
def create_shuffled_forecasts(): """! Create shuffled forecasts for all coordinates and save to netCDF. """ date_index_sample = source_cube.sample_date_indices( ) # need this to be created once and then always be the same lats, lons = source_cube.get_lat_lon_values() for lat in range(len(lats)): if bot_lat <= lats[lat] <= top_lat: for lon in range(len(lons)): if left_lon <= lons[lon] <= right_lon: transform.shuffle(lat, lon, date_index_sample) forecast_cube.aggregate_netcdf(settings.placeholder_date, settings.FORECAST_SHUFFLE_PATH)
def create_forecast_files(date: datetime.date): """! Read the ACCESS-G data for a given forecast date, transform it using the predictor transformation parameters saved to netcdf, and save the resulting forecast to netcdf. - for all lead times for a date - for all grid points @param date date to forecast """ lats, lons = source_cube.get_lat_lon_values() for lat in lats: if bot_lat <= lat <= top_lat: for lon in lons: if left_lon <= lon <= right_lon: try: grid_forecast(date, lat, lon) except ValueError: continue forecast_cube.aggregate_netcdf(date)
def create_cube(cubepathname, lat=None, lon=None): """! Create cube for grid parameters""" # also check paramaters if you're creating a single-grid cube or a whole-grid cube for aggregation # Lat and lon are optional bc not needed for aggregated file if os.path.exists(cubepathname): os.remove(cubepathname) outcube = Dataset(cubepathname, mode='w', format='NETCDF4') outcube.history = 'Created ' + datetime.datetime.now().isoformat() outcube.createDimension('np_set', 2000) # may be 2000 outcube.createDimension('tp_set', 2) np_set = outcube.createVariable('np_set', 'u4', 'np_set') tp_set = outcube.createVariable('tp_set', 'u4', 'tp_set') np_set.setncatts({"long_name": "normalised parameter set"}) tp_set.setncatts({"long_name": "transformed parameter set"}) outcube.createDimension('np_types', 5) outcube.createDimension('tp_types', 3) np_types = outcube.createVariable('np_types', 'u4', 'np_types') tp_types = outcube.createVariable('tp_types', 'u4', 'tp_types') np_types.setncatts({ "long_name": "normalised parameter types: mu1, mu2, sigma1, sigma2, scaling_factor" }) tp_types.setncatts({ "long_name": "transformed parameter types: lambda, epsilon, scaling_factor" }) outcube.createDimension('lead_time', 9) lead = outcube.createVariable('lead_time', 'u4', 'lead_time') lead[:] = range(9) np_set[:] = range(2000) tp_set[:] = range(2) np_types[:] = range(5) tp_types[:] = range(3) if 'aggregate' in cubepathname: #refcube = xr.open_dataset(settings.ACCESS_G_PATH + settings.access_g_filename('20190101')) lat, lon = get_lat_lon_values() rows = len(lat) cols = len(lon) outcube.description = 'Normal and transformed parameters for entire grid.' outcube.createDimension('lon', cols) # cols outcube.createDimension('lat', rows) # rows ylat = outcube.createVariable('lat', 'f4', 'lat') xlon = outcube.createVariable('lon', 'f4', 'lon') # data variables outcube.createVariable( 'n_parameters', 'f8', ('lat', 'lon', 'lead_time', 'np_set', 'np_types'), least_significant_digit=3, fill_value=-9999.0) outcube.createVariable( 't_parameters', 'f8', ('lat', 'lon', 'lead_time', 'tp_set', 'tp_types'), least_significant_digit=3, fill_value=-9999.0) else: if not lat or not lon: print('Need to run with lat/lon parameters') outcube.description = 'Normal and transformed parameters for grid at: ' + lat + ', ' + lon # data variables outcube.createVariable('n_parameters', 'f8', ('lead_time', 'np_set', 'np_types'), least_significant_digit=3, fill_value=-9999.0) outcube.createVariable('t_parameters', 'f8', ('lead_time', 'tp_set', 'tp_types'), least_significant_digit=3, fill_value=-9999.0) ylat = outcube.createVariable('lat', 'f4') xlon = outcube.createVariable('lon', 'f4') ylat[:] = lat xlon[:] = lon # add attributes xlon.setncatts({ "long_name": "longitude", "units": "degrees_east", "proj": "longlat", "datum": "WGS84" }) ylat.setncatts({ "long_name": "latitude", "units": "degrees_north", "proj": "longlat", "datum": "WGS84" }) outcube.close()
def create_cube(cubepathname, date=None, lat=None, lon=None): # Lat and lon are optional bc not needed for aggregated file if os.path.exists(cubepathname): os.remove(cubepathname) outcube = Dataset(cubepathname, mode='w', format='NETCDF4') outcube.history = 'Created ' + datetime.datetime.now().isoformat() outcube.createDimension('ensemble_member', 1000) ens = outcube.createVariable('ensemble_member', 'u4', 'ensemble_member') outcube.createDimension('lead_time', 9) lead = outcube.createVariable('lead_time', 'u4', 'lead_time') if 'aggregate' in cubepathname: lats, lons = get_lat_lon_values() rows = len(lats) cols = len(lons) outcube.createDimension('lon', cols) # cols outcube.createDimension('lat', rows) # rows ylat = outcube.createVariable('lat', 'f4', 'lat') xlon = outcube.createVariable('lon', 'f4', 'lon') ylat[:], xlon[:] = get_lat_lon_values() outcube.createVariable('forecast_value', 'f8', ('lat', 'lon', 'lead_time', 'ensemble_member'), least_significant_digit=3, fill_value=-9999.0) outcube.description = "Post-processed forecast for " + str(date) else: ylat = outcube.createVariable('lat', 'f4') xlon = outcube.createVariable('lon', 'f4') ylat[:] = lat xlon[:] = lon outcube.createVariable('forecast_value', 'f8', ('lead_time', 'ensemble_member'), least_significant_digit=3, fill_value=-9999.0) outcube.description = 'Post-processed forecast for grid at: ' + lat + ', ' + lon + " on " + str( date) # add attributes xlon.setncatts({ "long_name": "longitude", "units": "degrees_east", "proj": "longlat", "datum": "WGS84" }) ylat.setncatts({ "long_name": "latitude", "units": "degrees_north", "proj": "longlat", "datum": "WGS84" }) lead[:] = range(9) ens[:] = range(1000) outcube.close()