def get_unit_convert(): """ extension: get unit conversion dictionary for sensitivity to each days emissions input: None output: dict ('units.<YYYYMMDD>': np.ndarray( shape_of( template.sense_emis ) ) notes: SensitivityData.emis units = CF/(ppm/s) PhysicalAdjointData.emis units = CF/(mol/(s*m^2)) """ global unit_key #physical constants: #molar weight of dry air (precision matches cmaq) mwair = 28.9628 #convert proportion to ppm ppm_scale = 1E6 #convert g to kg kg_scale = 1E-3 unit_dict = {} #all spcs have same shape, get from 1st tmp_spc = ncf.get_attr(template.sense_emis, 'VAR-LIST').split()[0] target_shape = ncf.get_variable(template.sense_emis, tmp_spc)[:].shape #layer thickness constant between files lay_sigma = list(ncf.get_attr(template.sense_emis, 'VGLVLS')) #layer thickness measured in scaled pressure units lay_thick = [ lay_sigma[i] - lay_sigma[i + 1] for i in range(len(lay_sigma) - 1) ] lay_thick = np.array(lay_thick).reshape((1, len(lay_thick), 1, 1)) for date in dt.get_datelist(): met_file = dt.replace_date(cmaq_config.met_cro_3d, date) #slice off any extra layers above area of interest rhoj = ncf.get_variable(met_file, 'DENSA_J')[:, :len(lay_thick), ...] #assert timesteps are compatible assert (target_shape[0] - 1) >= (rhoj.shape[0] - 1), 'incompatible timesteps' assert (target_shape[0] - 1) % (rhoj.shape[0] - 1) == 0, 'incompatible timesteps' reps = (target_shape[0] - 1) // (rhoj.shape[0] - 1) rhoj_interp = np.zeros(target_shape) for r in range(reps): frac = float(2 * r + 1) / float(2 * reps) rhoj_interp[ r:-1:reps, ...] = (1 - frac) * rhoj[:-1, ...] + frac * rhoj[1:, ...] rhoj_interp[-1, ...] = rhoj[-1, ...] unit_array = (ppm_scale * kg_scale * mwair) / (rhoj_interp * lay_thick) day_label = dt.replace_date(unit_key, date) unit_dict[day_label] = unit_array return unit_dict
def make_forcing(): """put a block of forcing at: - The second last time-step of the last day - Only on the surface layer - In the middle of the domain - For every species """ nstep, nlay, nrow, ncol = ncf.get_variable(template.force, spcs_list[0]).shape for date in dt.get_datelist(): force = { spc: np.zeros(( nstep, nlay, nrow, ncol, )) for spc in spcs_list } if date == dt.get_datelist()[-1]: for arr in force.values(): trow, tcol = int(nrow / 3), int(ncol / 3) arr[-2, 0, trow:2 * trow, tcol:2 * tcol] = 1. f_file = dt.replace_date(cmaq.force_file, date) ncf.create_from_template(template.force, f_file, var_change=force, date=date, overwrite=True) return d.AdjointForcingData()
def get_variable(self, file_label, varname): """ extension: return an array of a single variable input: string, string output: numpy.ndarray """ err_msg = 'file_label {} not in file_details'.format(file_label) assert file_label in self.file_data.keys(), err_msg return ncf.get_variable(self.file_data[file_label]['actual'], varname)
def bwd_no_transport(adjoint_forcing): """mimic CMAQ_bwd with no transport. assumes ALL files have a 1-hour timestep""" #get nlays for force, sense & sense_emis f_lay = ncf.get_variable(template.force, spcs_list[0]).shape[1] s_lay = ncf.get_variable(template.sense_conc, spcs_list[0]).shape[1] e_lay = ncf.get_variable(template.sense_emis, spcs_list[0]).shape[1] #get icon for each species, init as 0. nstep, _, row, col = ncf.get_variable(template.force, spcs_list[0]).shape icon = {spc: np.zeros(( s_lay, row, col, )) for spc in spcs_list} for date in dt.get_datelist()[::-1]: force = ncf.get_variable(dt.replace_date(cmaq.force_file, date), spcs_list) conc = {} emis = {} for spc in spcs_list: bwd_arr = np.zeros((nstep, s_lay, row, col)) bwd_arr[-1, :, :, :] = icon[spc][:, :, :] bwd_arr[:-1, :, :, :] = force[spc][1:, :s_lay, :, :] s_arr = np.cumsum(bwd_arr[::-1, :, :, :], axis=0)[::-1, :, :, :] icon[spc][:] = s_arr[0, :, :, :].copy() conc[spc] = s_arr[:, :s_lay, :, :].copy() emis[spc] = s_arr[:, :e_lay, :, :].copy() * float(tsec) #write sensitivity files c_file = dt.replace_date(cmaq.conc_sense_file, date) e_file = dt.replace_date(cmaq.emis_sense_file, date) ncf.create_from_template(template.sense_conc, c_file, var_change=conc, date=date, overwrite=True) ncf.create_from_template(template.sense_emis, e_file, var_change=emis, date=date, overwrite=True) return d.SensitivityData()
def prepare_model(physical_data): """ application: change resolution/formatting of physical data for input in forward model input: PhysicalData output: ModelInputData """ global unit_convert if unit_convert is None: unit_convert = get_unit_convert() if inc_icon is True: model_input_args = {'icon': {}} #physical icon has no time dim, model input icon has time dim of len 1 for spcs, icon_array in physical_data.icon.items(): model_input_args['icon'][spcs] = icon_array.reshape( (1, ) + icon_array.shape) else: model_input_args = {} #all emis files & spcs for model_input use same NSTEP dimension, get it's size emis_fname = dt.replace_date(template.emis, dt.start_date) m_daysize = ncf.get_variable(emis_fname, physical_data.spcs[0]).shape[0] - 1 dlist = dt.get_datelist() p_daysize = float(physical_data.nstep) / len(dlist) assert (p_daysize < 1) or (m_daysize % p_daysize == 0), 'physical & model input emis TSTEP incompatible.' emis_pattern = 'emis.<YYYYMMDD>' for i, date in enumerate(dlist): spcs_dict = {} start = int(i * p_daysize) end = int((i + 1) * p_daysize) if start == end: end += 1 for spcs_name in physical_data.spcs: phys_data = physical_data.emis[spcs_name][start:end, ...] if end < physical_data.nstep: last_slice = physical_data.emis[spcs_name][end:end + 1, ...] else: last_slice = physical_data.emis[spcs_name][end - 1:end, ...] mod_data = np.repeat(phys_data, m_daysize // (end - start), axis=0) mod_data = np.append(mod_data, last_slice, axis=0) spcs_dict[spcs_name] = mod_data * unit_convert emis_argname = dt.replace_date(emis_pattern, date) model_input_args[emis_argname] = spcs_dict #may want to remove this line in future. cmaq.wipeout_fwd() return ModelInputData.create_new(**model_input_args)
def finite_diff(scale): prior_phys = user.get_background() unknowns = transform(prior_phys, d.UnknownData) init_vector = unknowns.get_vector() init_gradient = partial_adjoint(init_vector) d.ModelOutputData().archive('init_conc') pert_vector = make_perturbation(init_vector, scale) pert_gradient = partial_adjoint(pert_vector) d.ModelOutputData().archive('pert_conc') d.AdjointForcingData().archive('force') eps = 1e-6 if abs(pert_gradient - init_gradient).sum() > eps * abs(pert_gradient).sum(): print "WARNING: pert & init gradients differ." print "init gradient norm = {:}".format(np.linalg.norm(init_gradient)) print "pert gradient norm = {:}".format(np.linalg.norm(pert_gradient)) pert_diff = pert_vector - init_vector sense_score = .5 * ((pert_diff * init_gradient).sum() + (pert_diff * pert_gradient).sum()) force_score = 0. iconc_file = os.path.join(archive_path, 'init_conc', archive_defn.conc_file) pconc_file = os.path.join(archive_path, 'pert_conc', archive_defn.conc_file) force_file = os.path.join(archive_path, 'force', archive_defn.force_file) for date in dt.get_datelist(): iconc = ncf.get_variable(dt.replace_date(iconc_file, date), spcs_list) pconc = ncf.get_variable(dt.replace_date(pconc_file, date), spcs_list) force = ncf.get_variable(dt.replace_date(force_file, date), spcs_list) c_diff = {s: pconc[s] - iconc[s] for s in spcs_list} force_score += sum([(c_diff[s] * force[s]).sum() for s in spcs_list]) return sense_score, force_score
def get_kwargs_dict(cls): """ extension: get a dict that will work as kwarg input input: None output: { file_label : { spcs : np.ndarray(<zeros>) } } """ file_labels = get_filedict(cls.__name__).keys() spcs = ncf.get_attr(template.force, 'VAR-LIST').split() shape = ncf.get_variable(template.force, spcs[0]).shape argdict = {} for label in file_labels: data = {spc: np.zeros(shape) for spc in spcs} argdict[label] = data return argdict
def obs_operator(model_output): """ application: simulate set of observations from output of the forward model input: ModelOutputData output: ObservationData """ ObservationData.assert_params() val_list = [o for o in ObservationData.offset_term] for ymd, ilist in ObservationData.ind_by_date.items(): conc_file = model_output.file_data['conc.' + ymd]['actual'] var_dict = ncf.get_variable(conc_file, ObservationData.spcs) for i in ilist: for coord, weight in ObservationData.weight_grid[i].items(): if str(coord[0]) == ymd: step, lay, row, col, spc = coord[1:] conc = var_dict[spc][step, lay, row, col] val_list[i] += (weight * conc) return ObservationData(val_list)
def convert_unc( unc, val ): """ convert the uncertainty object provided into a valid dictionary uncertainty object is either a string (filepath), dictionary (of spcs) or a scalar """ spc_list = val.keys() arr_shape = val.values()[0].shape if str(unc) == unc: try: unc_var = ncf.get_variable( unc, spc_list ) except: print 'uncertainty file is not valid' raise for spc in spc_list: arr = unc_var[ spc ] msg = 'unc file has data with wrong shape, needs {:}'.format( str(arr_shape) ) assert arr.shape == arr_shape, msg unc_dict = { s:unc_var[s] for s in spc_list } elif type(unc) == dict: msg = 'uncertainty dictionary is missing needed spcs.' assert set( spc_list ).issubset( unc.keys() ), msg unc_dict = {} for spc in spc_list: val = unc[ spc ] unc_dict[ spc ] = np.zeros(arr_shape) + val else: try: val = float( unc ) except: print 'invalid uncertainty parameter' raise unc_dict = { s:(np.zeros(arr_shape)+val) for s in spc_list } for spc in spc_list: arr = unc_dict.pop( spc ) assert (arr > 0).all(), 'uncertainty values must be greater than 0.' unc_dict[ spc + '_UNC' ] = arr return unc_dict
def fwd_no_transport(model_input): """mimic CMAQ_fwd with no transport. assumes ALL files have a 1-hour timestep""" #get nlays conc c_lay = ncf.get_variable(template.conc, spcs_list[0]).shape[1] #get nlays emis e_lay = ncf.get_variable(dt.replace_date(template.emis, dt.start_date), spcs_list[0]).shape[1] #get icon for each species icon = ncf.get_variable(cmaq.icon_file, spcs_list) #get constants to convert emission units mwair = 28.9628 ppm_scale = 1E6 kg_scale = 1E-3 srcfile = dt.replace_date(cmaq.met_cro_3d, dt.start_date) xcell = ncf.get_attr(srcfile, 'XCELL') ycell = ncf.get_attr(srcfile, 'YCELL') lay_sigma = list(ncf.get_attr(srcfile, 'VGLVLS')) lay_thick = [lay_sigma[i] - lay_sigma[i + 1] for i in range(e_lay)] lay_thick = np.array(lay_thick).reshape((1, e_lay, 1, 1)) emis_scale = (ppm_scale * kg_scale * mwair) / (lay_thick * xcell * ycell ) # * RRHOJ #run fwd for date in dt.get_datelist(): conc = ncf.get_variable(template.conc, spcs_list) emis = ncf.get_variable(dt.replace_date(cmaq.emis_file, date), spcs_list) rhoj = ncf.get_variable(dt.replace_date(cmaq.met_cro_3d, date), "DENSA_J") for spc, c_arr in conc.items(): c_arr[:, :, :, :] = icon[spc][:, :c_lay, :, :] e_arr = emis_scale * emis[spc][:-1, ...] e_arr = 2 * tsec * e_arr / (rhoj[:-1, :e_lay, :, :] + rhoj[1:, :e_lay, :, :]) c_arr[1:, :e_lay, :, :] += np.cumsum(e_arr, axis=0) #update icon for next day icon[spc] = c_arr[-1:, ...] #write conc file c_file = dt.replace_date(cmaq.conc_file, date) ncf.create_from_template(template.conc, c_file, var_change=conc, date=date, overwrite=True) return d.ModelOutputData()
def map_sense(sensitivity): """ application: map adjoint sensitivities to physical grid of unknowns. input: SensitivityData output: PhysicalAdjointData """ global unit_convert_dict global unit_key if unit_convert_dict is None: unit_convert_dict = get_unit_convert() #check that: #- date_handle dates exist #- PhysicalAdjointData params exist #- template.emis & template.sense_emis are compatible #- template.icon & template.sense_conc are compatible datelist = dt.get_datelist() PhysicalAdjointData.assert_params() #all spcs use same dimension set, therefore only need to test 1. test_spc = PhysicalAdjointData.spcs[0] test_fname = dt.replace_date(template.emis, dt.start_date) mod_shape = ncf.get_variable(test_fname, test_spc).shape #phys_params = ['tsec','nstep','nlays_icon','nlays_emis','nrows','ncols','spcs'] #icon_dict = { spcs: np.ndarray( nlays_icon, nrows, ncols ) } #emis_dict = { spcs: np.ndarray( nstep, nlays_emis, nrows, ncols ) } #create blank constructors for PhysicalAdjointData p = PhysicalAdjointData if inc_icon is True: icon_shape = ( p.nlays_icon, p.nrows, p.ncols, ) icon_dict = {spc: np.zeros(icon_shape) for spc in p.spcs} emis_shape = ( p.nstep, p.nlays_emis, p.nrows, p.ncols, ) emis_dict = {spc: np.zeros(emis_shape) for spc in p.spcs} del p #construct icon_dict if inc_icon is True: icon_label = dt.replace_date('conc.<YYYYMMDD>', datelist[0]) icon_fname = sensitivity.file_data[icon_label]['actual'] icon_vars = ncf.get_variable(icon_fname, icon_dict.keys()) for spc in PhysicalAdjointData.spcs: data = icon_vars[spc][0, :, :, :] ilays, irows, icols = data.shape msg = 'conc_sense and PhysicalAdjointData.{} are incompatible' assert ilays >= PhysicalAdjointData.nlays_icon, msg.format( 'nlays_icon') assert irows == PhysicalAdjointData.nrows, msg.format('nrows') assert icols == PhysicalAdjointData.ncols, msg.format('ncols') icon_dict[spc] = data[ 0:PhysicalAdjointData.nlays_icon, :, :].copy() p_daysize = float(24 * 60 * 60) / PhysicalAdjointData.tsec emis_pattern = 'emis.<YYYYMMDD>' for i, date in enumerate(datelist): label = dt.replace_date(emis_pattern, date) sense_fname = sensitivity.file_data[label]['actual'] sense_data_dict = ncf.get_variable(sense_fname, PhysicalAdjointData.spcs) start = int(i * p_daysize) end = int((i + 1) * p_daysize) if start == end: end += 1 for spc in PhysicalAdjointData.spcs: unit_convert = unit_convert_dict[dt.replace_date(unit_key, date)] sdata = sense_data_dict[spc][:] * unit_convert sstep, slay, srow, scol = sdata.shape #recast to match mod_shape mstep, mlay, mrow, mcol = mod_shape msg = 'emis_sense and ModelInputData {} are incompatible.' assert ((sstep - 1) >= (mstep - 1)) and ((sstep - 1) % (mstep - 1) == 0), msg.format('TSTEP') assert slay >= mlay, msg.format('NLAYS') assert srow == mrow, msg.format('NROWS') assert scol == mcol, msg.format('NCOLS') sense_arr = sdata[:-1, :mlay, :, :] model_arr = sense_arr.reshape( (mstep - 1, -1, mlay, mrow, mcol)).sum(axis=1) #adjoint prepare_model pstep = end - start play = PhysicalAdjointData.nlays_emis prow = PhysicalAdjointData.nrows pcol = PhysicalAdjointData.ncols msg = 'ModelInputData and PhysicalAdjointData.{} are incompatible.' assert ((mstep - 1) >= (pstep)) and ((mstep - 1) % (pstep) == 0), msg.format('nstep') assert mlay >= play, msg.format('nlays_emis') assert mrow == prow, msg.format('nrows') assert mcol == pcol, msg.format('ncols') model_arr = model_arr[:, :play, :, :] phys_arr = model_arr.reshape( (pstep, -1, play, prow, pcol)).sum(axis=1) emis_dict[spc][start:end, ...] += phys_arr.copy() if inc_icon is False: icon_dict = None return PhysicalAdjointData(icon_dict, emis_dict)
def from_file(cls, filename): """ extension: create a PhysicalData instance from a file input: user-defined output: PhysicalData eg: prior_phys = datadef.PhysicalData.from_file( "saved_prior.data" ) """ daysec = 24 * 60 * 60 unc = lambda spc: spc + '_UNC' #get all data/parameters from file sdate = str(ncf.get_attr(filename, 'SDATE')) edate = str(ncf.get_attr(filename, 'EDATE')) tstep = ncf.get_attr(filename, 'TSTEP') day, step = int(tstep[0]), int(tstep[1]) tsec = daysec * day + 3600 * (step // 10000) + 60 * ( (step // 100) % 100) + (step) % 100 spcs_list = ncf.get_attr(filename, 'VAR-LIST').split() unc_list = [unc(spc) for spc in spcs_list] if inc_icon is True: icon_dict = ncf.get_variable(filename, spcs_list, group='icon') icon_unc = ncf.get_variable(filename, unc_list, group='icon') emis_dict = ncf.get_variable(filename, spcs_list, group='emis') emis_unc = ncf.get_variable(filename, unc_list, group='emis') for spc in spcs_list: if inc_icon is True: icon_unc[spc] = icon_unc.pop(unc(spc)) emis_unc[spc] = emis_unc.pop(unc(spc)) #ensure parameters from file are valid msg = 'invalid start date' assert sdate == dt.replace_date('<YYYYDDD>', dt.start_date), msg msg = 'invalid end date' assert edate == dt.replace_date('<YYYYDDD>', dt.end_date), msg emis_shape = [e.shape for e in emis_dict.values()] for eshape in emis_shape[1:]: assert eshape == emis_shape[ 0], 'all emis spcs must have the same shape.' estep, elays, erows, ecols = emis_shape[0] if inc_icon is True: icon_shape = [i.shape for i in icon_dict.values()] for ishape in icon_shape[1:]: assert ishape == icon_shape[ 0], 'all icon spcs must have the same shape.' ilays, irows, icols = icon_shape[0] assert irows == erows, 'icon & emis must match rows.' assert icols == ecols, 'icon & emis must match columns.' assert max(daysec, tsec) % min( daysec, tsec ) == 0, 'tsec must be a factor or multiple of No. seconds in a day.' assert (tsec >= daysec) or ( estep % (daysec // tsec) == 0), 'nstep must cleanly divide into days.' for spc in spcs_list: msg = 'Uncertainty values are invalid for this data.' if inc_icon is True: assert icon_unc[spc].shape == icon_dict[spc].shape, msg assert (icon_unc[spc] > 0).all(), msg assert emis_unc[spc].shape == emis_dict[spc].shape, msg assert (emis_unc[spc] > 0).all(), msg #assign new param values. par_name = [ 'tsec', 'nstep', 'nlays_emis', 'nrows', 'ncols', 'spcs', 'emis_unc' ] par_val = [tsec, estep, elays, erows, ecols, spcs_list, emis_unc] par_mutable = ['emis_unc'] if inc_icon is True: par_name += ['nlays_icon', 'icon_unc'] par_val += [ilays, icon_unc] par_mutable += ['icon_unc'] for name, val in zip(par_name, par_val): old_val = getattr(cls, name) if old_val is not None: #param already defined, ensure no clash. if name in par_mutable: #parameter is mutable, affect applied globally msg = 'Any change to PhysicalAbstractData.{} is applied globally!'.format( name) logger.warn(msg) else: msg = 'cannot change PhysicalAbstractData.{}'.format(name) assert np.array_equal(old_val, val), msg #set this abstract classes attribute, not calling child! setattr(PhysicalAbstractData, name, val) if inc_icon is False: icon_dict = None return cls(icon_dict, emis_dict)
# PhysicalData tstep must fit into model days assert max(tsec, daysec) % min(tsec, daysec) == 0, 'tstep must fit into days' assert len( dt.get_datelist()) * daysec % tsec == 0, 'tstep must fit into model length' # convert emis-file data into needed PhysicalData format nrow = int(ncf.get_attr(efile, 'NROWS')) ncol = int(ncf.get_attr(efile, 'NCOLS')) xcell = float(ncf.get_attr(efile, 'XCELL')) ycell = float(ncf.get_attr(efile, 'YCELL')) emis_dict = {spc: [] for spc in spc_list} cell_area = xcell * ycell for date in dt.get_datelist(): efile = dt.replace_date(cmaq_config.emis_file, date) edict = ncf.get_variable(efile, spc_list) for spc in spc_list: #get data and convert unit (mol/(s*cell) to mol/(s*m**2) data = edict[spc][:-1, :emis_nlay, :, :] / cell_area emis_dict[spc].append(data) tot_nstep = len(dt.get_datelist()) * daysec // tsec for spc in spc_list: data = emis_dict[spc] data = np.concatenate(data, axis=0) data = data.reshape(( tot_nstep, -1, emis_nlay, nrow, ncol,
cmaq_config.avg_conc_spcs = ' '.join(conc_spcs) if str(cmaq_config.force_lays).lower() == 'template': force_lay = int(ncf.get_attr(icon_file, 'NLAYS')) cmaq_config.force_lays = str(force_lay) if str(cmaq_config.sense_emis_lays).lower() == 'template': sense_lay = int(ncf.get_attr(icon_file, 'NLAYS')) cmaq_config.sense_emis_lays = str(sense_lay) # generate sample files by running 1 day of cmaq (fwd & bwd) cmaq_handle.wipeout_fwd() cmaq_handle.run_fwd_single(dt.start_date, is_first=True) # make force file with same attr as conc and all data zeroed conc_spcs = ncf.get_attr(conc_file, 'VAR-LIST').split() conc_data = ncf.get_variable(conc_file, conc_spcs) force_data = {k: np.zeros(v.shape) for k, v in conc_data.items()} ncf.create_from_template(conc_file, force_file, force_data) cmaq_handle.run_bwd_single(dt.start_date, is_first=True) # create record for icon & emis files fh.ensure_path(os.path.dirname(template.icon)) ncf.copy_compress(icon_file, template.icon) for date in dt.get_datelist(): emis_src = dt.replace_date(cmaq_config.emis_file, date) emis_dst = dt.replace_date(template.emis, date) fh.ensure_path(os.path.dirname(emis_dst)) ncf.copy_compress(emis_src, emis_dst) # create template for conc, force & sense files fh.ensure_path(os.path.dirname(template.conc))