def _create_var2d_provider(self, var2d): """Create MULE provider for var2d. Also, rotate the data to UKMO convention.""" var2d_for_surf = np.roll(var2d, self.grid["i_pm"], axis=1) var2d_provider = mule.ArrayDataProvider(var2d_for_surf) del var2d_for_surf return var2d_provider
def _minimal_valid_field(num_cols, num_rows, start_lon, start_lat, col_spacing, row_spacing): """ Return a basic field object; populating the bare minimum header inputs for validation. """ fld = mule.Field3.empty() fld.lbrel = 3 fld.raw[1] = 1 fld.lbext = 0 fld.lbnpt, fld.lbrow = num_cols, num_rows fld.bdx, fld.bdy = col_spacing, row_spacing # Note: the lookup header grid origin holds the "0th" point not # the first point (as in the file object grid origin) fld.bzx = start_lon - fld.bdx fld.bzy = start_lat - fld.bdy # Attach a basic range array (reshaped) to be the data data = np.arange(fld.lbnpt*fld.lbrow).reshape(fld.lbrow, fld.lbnpt) provider = mule.ArrayDataProvider(data) fld.set_data_provider(provider) return fld
def vertical_interpolate(infile, outfile, orogfile, vertlevs): """ Perform a vertical interpolation of ancil file 'infile', using the level definition namelist 'vertlevs' Args: infile (string): Path to input UM ancil file outfile (string): Path to output UM ancil file orogfile (string): Path to UM orography for true level calculations vertlevs (string): Path to UM vertical namelist file for target levels """ ancil = mule.AncilFile.from_file(infile) def categorise_fields(m): df = pandas.DataFrame({'field': m.fields}) df['year'] = df['field'].apply(lambda f: f.lbyr) df['month'] = df['field'].apply(lambda f: f.lbmon) df['day'] = df['field'].apply(lambda f: f.lbdat) df['hour'] = df['field'].apply(lambda f: f.lbhr) df['minute'] = df['field'].apply(lambda f: f.lbmin) df['second'] = df['field'].apply(lambda f: f.lbsec) df['stash'] = df['field'].apply(lambda f: f.lbuser4) df['vertical_type'] = df['field'].apply(lambda f: f.lbvc) df['level'] = df['field'].apply(lambda f: f.lblev) df['pseudo'] = df['field'].apply(lambda f: f.lbuser5) #df['bulev'] = df['field'].apply(lambda f: f.bulev) df['blev'] = df['field'].apply(lambda f: f.blev) df['brlev'] = df['field'].apply(lambda f: f.brlev) #df['bhulev'] = df['field'].apply(lambda f: f.bhulev) df['bhlev'] = df['field'].apply(lambda f: f.bhlev) df['bhrlev'] = df['field'].apply(lambda f: f.bhrlev) return df # Categorise the 2d slices in the input file df = categorise_fields(ancil) # Get the orography orog_file = mule.AncilFile.from_file(orogfile) orog = orog_file.fields[0].get_data() levtype = 'theta' target_levels = f90nml.read(vertlevs)['VERTLEVS'] if levtype == 'rho': # Rho levels eta = numpy.array(target_levels['eta_rho']) const_lev = target_levels['first_constant_r_rho_level'] - 1 if levtype == 'theta': # Theta levels eta = numpy.array(target_levels['eta_theta']) const_lev = target_levels['first_constant_r_rho_level'] - 1 # True height of the target levels target_Zsea = target_levels['z_top_of_model'] * eta target_C = (1 - eta / eta[const_lev])**2 target_C[const_lev:] = 0 target_Z = target_Zsea[:, numpy.newaxis, numpy.newaxis] + numpy.multiply.outer( target_C, orog) ancil_out = ancil.copy() # Group the 2d slices with the same field and time value together for name, g in df.groupby( ['year', 'month', 'day', 'hour', 'minute', 'second', 'stash']): print("%04d%02d%02dT%02d:%02d:%02d STASH %d" % name) # Stack the slices into a 3d array cube = numpy.stack(g['field'].apply(lambda f: f.get_data())) # True height of each position Zsea = g['blev'] C = g['bhlev'] Z = Zsea[:, numpy.newaxis, numpy.newaxis] + numpy.multiply.outer( C, orog) # Interpolate from the source true height to the target true height new_cube = stratify.interpolate(target_Z, Z, cube, axis=0, extrapolation='nearest') for level in range(1, new_cube.shape[0]): f = g.iloc[0].at['field'].copy() f.lblev = level + 1 f.blev = target_Zsea[level] f.brlev = -1073741824 f.bhlev = target_C[level] f.bhrlev = -1073741824 f.set_data_provider(mule.ArrayDataProvider(new_cube[level, :, :])) ancil_out.fields.append(f) ancil_out.to_file(outfile)
def create_surface_ancillary(input_ds, stash_map): """Create a surface-level UM ancillary file Args: input_ds: Source dataset/dataarray output_filename: UM ancillary file to create stash_map: Mapping of variable name from `input_ds` to STASH code Returns: :obj:`mule.AncilFile` containing ancillary file data, write out with ``.to_file()`` Example: :: input_ds = xarray.open_mfdataset(files, engine='pynio') stash_map = {'CI_GDS0_SFC': 31, 'SSTK_GDS0_SFC': 507,} ancil = create_um_surface_ancillary(input_ds, stash_map) ancil.to_file('sstice.ancil') Todo: * Assumes Gregorian calendar * Assumes sub-daily frequency * Does not compress output """ time = identify_time(input_ds) lat, lon = identify_lat_lon(input_ds) tstep = (time[1] - time[0]) / numpy.timedelta64(1, 's') template = { 'fixed_length_header': { 'sub_model': 1, # Atmosphere 'dataset_type': 4, # Ancillary 'horiz_grid_type': 0, # Global 'calendar': 1, # Gregorian 'grid_staggering': 6, # EndGame 'time_type': 1, # Time series 'model_version': 1006, # UM 10.6 # Start time 't1_year': time.dt.year.values[0], 't1_month': time.dt.month.values[0], 't1_day': time.dt.day.values[0], 't1_hour': time.dt.hour.values[0], 't1_minute': time.dt.minute.values[0], 't1_second': time.dt.second.values[0], # End time 't2_year': time.dt.year.values[-1], 't2_month': time.dt.month.values[-1], 't2_day': time.dt.day.values[-1], 't2_hour': time.dt.hour.values[-1], 't2_minute': time.dt.minute.values[-1], 't2_second': time.dt.second.values[-1], # Frequency (must be sub-daily) 't3_year': 0, 't3_month': 0, 't3_day': 0, 't3_hour': tstep / 3600, 't3_minute': tstep % 3600 / 60, 't3_second': tstep % 60, }, 'integer_constants': { 'num_times': time.size, 'num_cols': lon.size, 'num_rows': lat.size, 'num_levels': 1, 'num_field_types': len(stash_map), }, 'real_constants': { 'start_lat': lat.values[0] + (lat.values[1] - lat.values[0]) / 2.0, 'row_spacing': lat.values[1] - lat.values[0], 'start_lon': lon.values[0] + (lon.values[1] - lon.values[0]) / 2.0, 'col_spacing': lon.values[1] - lon.values[0], 'north_pole_lat': 90, 'north_pole_lon': 0, }, } ancil = mule.AncilFile.from_template(template) # UM Missing data magic value MDI = -1073741824.0 for var, stash in stash_map.items(): # Mask out NANs with MDI var_data = xarray.where(dask.array.isnan(input_ds[var]), MDI, input_ds[var]) for t in var_data[time.name]: field = mule.Field3.empty() field.lbyr = t.dt.year.values field.lbmon = t.dt.month.values field.lbdat = t.dt.day.values field.lbhr = t.dt.hour.values field.lbmin = t.dt.minute.values field.lbsec = t.dt.second.values field.lbtime = 1 # Instantaneous Gregorian calendar field.lbcode = 1 # Regular Lat-Lon grid field.lbhem = 0 # Global field.lbrow = ancil.integer_constants.num_rows field.lbnpt = ancil.integer_constants.num_cols field.lbpack = 0 # No packing field.lbrel = 3 # UM 8.1 or later field.lbvc = 129 # Surface field field.lbuser1 = 1 # Real data field.lbuser4 = stash # STASH code field.lbuser7 = 1 # Atmosphere model field.bplat = ancil.real_constants.north_pole_lat field.bplon = ancil.real_constants.north_pole_lon field.bdx = ancil.real_constants.col_spacing field.bdy = ancil.real_constants.row_spacing field.bzx = ancil.real_constants.start_lon - field.bdx / 2.0 field.bzy = ancil.real_constants.start_lat - field.bdy / 2.0 field.bmdi = MDI field.bmks = 1.0 field.set_data_provider( mule.ArrayDataProvider(var_data.sel({time.name: t}))) ancil.fields.append(field) return ancil
def gen_pert_field(clim_fields, alpha, ens_member, date): """ Generate an SST perturbation field from a set of climatological fields and some values to setup a random number generator. Args: * clim_fields: Array of 12 field objects giving the SST (lbuser4=24) for each month of the year. * alpha: Factor used by algorithm (higher values lead to more extreme perturbations). * ens_member: Ensemble member number - used in random generator. * date: Datetime object giving the desired date for the perturbed field. Returns: * pert_field: A new field object based on the first climatology field but with its data replaced by the new perturbed SST field. """ # Climatology should be a list of 12 field object giving the SSTs if len(clim_fields) != 12: msg = ( "Incorrect number of climatology fields; expected 12, found {0}") raise ValueError(msg.format(len(clim_fields))) # Check that the fields are appropriate releases and are SSTs for ifld, field in enumerate(clim_fields): if field.lbrel not in (2, 3): msg = "Climatology field {0} has invalid header release number" raise ValueError(msg.format(ifld + 1)) if field.lbuser4 != 24: msg = "Climatology field {0} is not an SST field" raise ValueError(msg.format(ifld + 1)) # Sort them into month order if they aren't already def month_sort(field): return field.lbmon clim_fields = sorted(clim_fields, key=month_sort) # The SST pert library requires the data from the fields as a big array, # so create it here: clim_array = np.empty((clim_fields[0].lbrow, clim_fields[0].lbnpt, 12)) for ifield, field in enumerate(clim_fields): clim_array[:, :, ifield] = field.get_data() # The library also requires some of the other arguments be packed into an # array similar to the UM's "dt" array: dt = np.array([ date.year, date.month, date.day, 0, # This element is a UTC offset and always 0 date.hour + 1, # Add 1 here because fieldcalc did it date.minute, ens_member, ens_member + 100 ]) # Call the library pert_data = sstpert(alpha, dt, clim_array) # Create a copy of the first field to store the new output pert_field = clim_fields[0].copy() pert_field.set_data_provider(mule.ArrayDataProvider(pert_data)) # Set the field headers from the given date pert_field.lbyr = pert_field.lbyrd = date.year pert_field.lbmon = pert_field.lbmond = date.month pert_field.lbdat = pert_field.lbdatd = date.day pert_field.lbhr = pert_field.lbhrd = date.hour pert_field.lbmin = pert_field.lbmind = date.minute pert_field.raw[6] = pert_field.raw[12] = 0 pert_field.lbft = 0 pert_field.lbpack = 0 return pert_field
def create_surf_file(self, file_type, varlist, surffile): # Create template for SURF file self._create_surf_template(file_type, varlist) # Create SURF object surf = mule.AncilFile.from_template(self.template) # The UM RECON preprocessor does not work if the SURF file (other # than _glu_smc) has a LEVEL_DEPENDENT_CONSTANTS section. However, # the MULE library executes validation code that requires the # LEVEL_DEPENDENT_CONSTANTS section to exist. The current # workaround is to disable the validation for this specific SURF # file, and remove the troublesome section. if file_type in ["_glu_snow", "_glu_ice", "_glu_sst"]: def dummy_validate(*args, **kwargs): pass surf.validate = dummy_validate surf.level_dependent_constants = None # Create Field3 object for each variable for key in varlist: # See if the varname and source are recognized if key not in varIds.keys(): print "WARN, %s not recognized!" % (key) continue # See if the source is recognized infile_type = key.split(":")[1] if infile_type not in ["LDT", "LVT"]: print "ERROR, invalid infile type %s" % (infile_type) print "Found in %s" % (key) print "Internal error, aborting..." sys.exit(1) # Trim the varname to exclude the source varid = key.split(":")[0] # Attempt to retrieve the variable from the appropriate # netCDF file. if infile_type == "LDT": try: var = self.ncid_ldt.variables[varid] except: print "WARN, %s not available in LDT file!" % (varid) continue elif infile_type == "LVT": try: var = self.ncid_lvt.variables[varid] except: print "WARN, %s not available in LVT file!" % (varid) continue # Save the "missing data" value for this netCDF variable if infile_type == "LVT": fillValue = var._FillValue elif infile_type == "LDT": fillValue = var.missing_value # At this point we have a reference to the variable. Copy to # a NumPy array, and record the number of vertical levels. if var.ndim == 2: var = var[:, :] nlev = 1 elif var.ndim == 3: var = var[:, :, :] nlev = var.shape[0] else: print "ERROR, unsupported array with ", ndim, ' dimensions!' sys.exit(1) # Loop through each level. for ilev in range(0, nlev): # In 2D case, work with the whole array. if var.ndim == 2: var2d = var[:, :] lblev = 9999 # Indicates surface level # In 3D case, pull out the current vertical level as # a 2D array. else: var2d = var[ilev, :, :] lblev = ilev + 1 # Use 1-based indexing # MULE doesn't like masked arrays, so pull the raw # data out in this case. if type(var2d) == np.ma.core.MaskedArray: var2d = var2d.data # Update the missing value to match that used in SURF var2d = np.where(var2d == fillValue, mule._REAL_MDI, var2d) # EMK...For SoilMoist, convert from m3/m3 to kg m-2. if varid == "SoilMoist_inst": soil_layer_thicknesses = \ self.ncid_lvt.getncattr("SOIL_LAYER_THICKNESSES") dzsoil = soil_layer_thicknesses[ilev] * 0.01 # cm to m var2d = np.where(var2d == mule._REAL_MDI, mule._REAL_MDI, var2d * 1000 * dzsoil) # Rotate the field to match the 0:360 longitudinal convention # used by GALWEM. Then create a "provider" of the data. var2d_for_surf = np.roll(var2d, self.i_pm, axis=1) var2d_provider = mule.ArrayDataProvider(var2d_for_surf) # Now add the field to the SURF object. print "var: %s, ilev: %s" % (key, ilev) surf = self._add_field(key,var2d_for_surf,lblev,ilev, \ nlev,var2d_provider,surf) # All fields have been added to the SURF object. Write to file. surf.to_file(surffile)