def make_forcing(): """put a block of forcing at: - The second last time-step of the last day - Only on the surface layer - In the middle of the domain - For every species """ nstep, nlay, nrow, ncol = ncf.get_variable(template.force, spcs_list[0]).shape for date in dt.get_datelist(): force = { spc: np.zeros(( nstep, nlay, nrow, ncol, )) for spc in spcs_list } if date == dt.get_datelist()[-1]: for arr in force.values(): trow, tcol = int(nrow / 3), int(ncol / 3) arr[-2, 0, trow:2 * trow, tcol:2 * tcol] = 1. f_file = dt.replace_date(cmaq.force_file, date) ncf.create_from_template(template.force, f_file, var_change=force, date=date, overwrite=True) return d.AdjointForcingData()
def get_unit_convert(): """ extension: get unit conversion dictionary for sensitivity to each days emissions input: None output: dict ('units.<YYYYMMDD>': np.ndarray( shape_of( template.sense_emis ) ) notes: SensitivityData.emis units = CF/(ppm/s) PhysicalAdjointData.emis units = CF/(mol/(s*m^2)) """ global unit_key #physical constants: #molar weight of dry air (precision matches cmaq) mwair = 28.9628 #convert proportion to ppm ppm_scale = 1E6 #convert g to kg kg_scale = 1E-3 unit_dict = {} #all spcs have same shape, get from 1st tmp_spc = ncf.get_attr(template.sense_emis, 'VAR-LIST').split()[0] target_shape = ncf.get_variable(template.sense_emis, tmp_spc)[:].shape #layer thickness constant between files lay_sigma = list(ncf.get_attr(template.sense_emis, 'VGLVLS')) #layer thickness measured in scaled pressure units lay_thick = [ lay_sigma[i] - lay_sigma[i + 1] for i in range(len(lay_sigma) - 1) ] lay_thick = np.array(lay_thick).reshape((1, len(lay_thick), 1, 1)) for date in dt.get_datelist(): met_file = dt.replace_date(cmaq_config.met_cro_3d, date) #slice off any extra layers above area of interest rhoj = ncf.get_variable(met_file, 'DENSA_J')[:, :len(lay_thick), ...] #assert timesteps are compatible assert (target_shape[0] - 1) >= (rhoj.shape[0] - 1), 'incompatible timesteps' assert (target_shape[0] - 1) % (rhoj.shape[0] - 1) == 0, 'incompatible timesteps' reps = (target_shape[0] - 1) // (rhoj.shape[0] - 1) rhoj_interp = np.zeros(target_shape) for r in range(reps): frac = float(2 * r + 1) / float(2 * reps) rhoj_interp[ r:-1:reps, ...] = (1 - frac) * rhoj[:-1, ...] + frac * rhoj[1:, ...] rhoj_interp[-1, ...] = rhoj[-1, ...] unit_array = (ppm_scale * kg_scale * mwair) / (rhoj_interp * lay_thick) day_label = dt.replace_date(unit_key, date) unit_dict[day_label] = unit_array return unit_dict
def run_bwd(): """ extension: run cmaq bwd from current config input: None output: None """ isfirst = True for cur_date in dt.get_datelist()[::-1]: run_bwd_single(cur_date, isfirst) isfirst = False clear_local_logs() return None
def prepare_model(physical_data): """ application: change resolution/formatting of physical data for input in forward model input: PhysicalData output: ModelInputData """ global unit_convert if unit_convert is None: unit_convert = get_unit_convert() if inc_icon is True: model_input_args = {'icon': {}} #physical icon has no time dim, model input icon has time dim of len 1 for spcs, icon_array in physical_data.icon.items(): model_input_args['icon'][spcs] = icon_array.reshape( (1, ) + icon_array.shape) else: model_input_args = {} #all emis files & spcs for model_input use same NSTEP dimension, get it's size emis_fname = dt.replace_date(template.emis, dt.start_date) m_daysize = ncf.get_variable(emis_fname, physical_data.spcs[0]).shape[0] - 1 dlist = dt.get_datelist() p_daysize = float(physical_data.nstep) / len(dlist) assert (p_daysize < 1) or (m_daysize % p_daysize == 0), 'physical & model input emis TSTEP incompatible.' emis_pattern = 'emis.<YYYYMMDD>' for i, date in enumerate(dlist): spcs_dict = {} start = int(i * p_daysize) end = int((i + 1) * p_daysize) if start == end: end += 1 for spcs_name in physical_data.spcs: phys_data = physical_data.emis[spcs_name][start:end, ...] if end < physical_data.nstep: last_slice = physical_data.emis[spcs_name][end:end + 1, ...] else: last_slice = physical_data.emis[spcs_name][end - 1:end, ...] mod_data = np.repeat(phys_data, m_daysize // (end - start), axis=0) mod_data = np.append(mod_data, last_slice, axis=0) spcs_dict[spcs_name] = mod_data * unit_convert emis_argname = dt.replace_date(emis_pattern, date) model_input_args[emis_argname] = spcs_dict #may want to remove this line in future. cmaq.wipeout_fwd() return ModelInputData.create_new(**model_input_args)
def fwd_no_transport(model_input): """mimic CMAQ_fwd with no transport. assumes ALL files have a 1-hour timestep""" #get nlays conc c_lay = ncf.get_variable(template.conc, spcs_list[0]).shape[1] #get nlays emis e_lay = ncf.get_variable(dt.replace_date(template.emis, dt.start_date), spcs_list[0]).shape[1] #get icon for each species icon = ncf.get_variable(cmaq.icon_file, spcs_list) #get constants to convert emission units mwair = 28.9628 ppm_scale = 1E6 kg_scale = 1E-3 srcfile = dt.replace_date(cmaq.met_cro_3d, dt.start_date) xcell = ncf.get_attr(srcfile, 'XCELL') ycell = ncf.get_attr(srcfile, 'YCELL') lay_sigma = list(ncf.get_attr(srcfile, 'VGLVLS')) lay_thick = [lay_sigma[i] - lay_sigma[i + 1] for i in range(e_lay)] lay_thick = np.array(lay_thick).reshape((1, e_lay, 1, 1)) emis_scale = (ppm_scale * kg_scale * mwair) / (lay_thick * xcell * ycell ) # * RRHOJ #run fwd for date in dt.get_datelist(): conc = ncf.get_variable(template.conc, spcs_list) emis = ncf.get_variable(dt.replace_date(cmaq.emis_file, date), spcs_list) rhoj = ncf.get_variable(dt.replace_date(cmaq.met_cro_3d, date), "DENSA_J") for spc, c_arr in conc.items(): c_arr[:, :, :, :] = icon[spc][:, :c_lay, :, :] e_arr = emis_scale * emis[spc][:-1, ...] e_arr = 2 * tsec * e_arr / (rhoj[:-1, :e_lay, :, :] + rhoj[1:, :e_lay, :, :]) c_arr[1:, :e_lay, :, :] += np.cumsum(e_arr, axis=0) #update icon for next day icon[spc] = c_arr[-1:, ...] #write conc file c_file = dt.replace_date(cmaq.conc_file, date) ncf.create_from_template(template.conc, c_file, var_change=conc, date=date, overwrite=True) return d.ModelOutputData()
def bwd_no_transport(adjoint_forcing): """mimic CMAQ_bwd with no transport. assumes ALL files have a 1-hour timestep""" #get nlays for force, sense & sense_emis f_lay = ncf.get_variable(template.force, spcs_list[0]).shape[1] s_lay = ncf.get_variable(template.sense_conc, spcs_list[0]).shape[1] e_lay = ncf.get_variable(template.sense_emis, spcs_list[0]).shape[1] #get icon for each species, init as 0. nstep, _, row, col = ncf.get_variable(template.force, spcs_list[0]).shape icon = {spc: np.zeros(( s_lay, row, col, )) for spc in spcs_list} for date in dt.get_datelist()[::-1]: force = ncf.get_variable(dt.replace_date(cmaq.force_file, date), spcs_list) conc = {} emis = {} for spc in spcs_list: bwd_arr = np.zeros((nstep, s_lay, row, col)) bwd_arr[-1, :, :, :] = icon[spc][:, :, :] bwd_arr[:-1, :, :, :] = force[spc][1:, :s_lay, :, :] s_arr = np.cumsum(bwd_arr[::-1, :, :, :], axis=0)[::-1, :, :, :] icon[spc][:] = s_arr[0, :, :, :].copy() conc[spc] = s_arr[:, :s_lay, :, :].copy() emis[spc] = s_arr[:, :e_lay, :, :].copy() * float(tsec) #write sensitivity files c_file = dt.replace_date(cmaq.conc_sense_file, date) e_file = dt.replace_date(cmaq.emis_sense_file, date) ncf.create_from_template(template.sense_conc, c_file, var_change=conc, date=date, overwrite=True) ncf.create_from_template(template.sense_emis, e_file, var_change=emis, date=date, overwrite=True) return d.SensitivityData()
def finite_diff(scale): prior_phys = user.get_background() unknowns = transform(prior_phys, d.UnknownData) init_vector = unknowns.get_vector() init_gradient = partial_adjoint(init_vector) d.ModelOutputData().archive('init_conc') pert_vector = make_perturbation(init_vector, scale) pert_gradient = partial_adjoint(pert_vector) d.ModelOutputData().archive('pert_conc') d.AdjointForcingData().archive('force') eps = 1e-6 if abs(pert_gradient - init_gradient).sum() > eps * abs(pert_gradient).sum(): print "WARNING: pert & init gradients differ." print "init gradient norm = {:}".format(np.linalg.norm(init_gradient)) print "pert gradient norm = {:}".format(np.linalg.norm(pert_gradient)) pert_diff = pert_vector - init_vector sense_score = .5 * ((pert_diff * init_gradient).sum() + (pert_diff * pert_gradient).sum()) force_score = 0. iconc_file = os.path.join(archive_path, 'init_conc', archive_defn.conc_file) pconc_file = os.path.join(archive_path, 'pert_conc', archive_defn.conc_file) force_file = os.path.join(archive_path, 'force', archive_defn.force_file) for date in dt.get_datelist(): iconc = ncf.get_variable(dt.replace_date(iconc_file, date), spcs_list) pconc = ncf.get_variable(dt.replace_date(pconc_file, date), spcs_list) force = ncf.get_variable(dt.replace_date(force_file, date), spcs_list) c_diff = {s: pconc[s] - iconc[s] for s in spcs_list} force_score += sum([(c_diff[s] * force[s]).sum() for s in spcs_list]) return sense_score, force_score
def from_file(cls, filename): """ extension: create an ObservationData from a file input: user-defined output: ObservationData eg: observed = datadef.ObservationData.from_file( "saved_obs.data" ) """ datalist = fh.load_list(filename) domain = datalist[0] sdate = domain.pop('SDATE') edate = domain.pop('EDATE') if 'is_lite' in domain.keys(): is_lite = domain.pop('is_lite') else: is_lite = False if cls.grid_attr is not None: logger.warn('Overwriting ObservationData.grid_attr') cls.grid_attr = domain cls.check_grid() msg = 'obs data does not match params date' assert sdate == np.int32(dt.replace_date('<YYYYMMDD>', dt.start_date)), msg assert edate == np.int32(dt.replace_date('<YYYYMMDD>', dt.end_date)), msg obs_list = datalist[1:] unc = [odict.pop('uncertainty') for odict in obs_list] val = [odict.pop('value') for odict in obs_list] off = [odict.pop('offset_term') for odict in obs_list] if is_lite is False: weight = [odict.pop('weight_grid') for odict in obs_list] #create default 'lite_coord' if not available coord = [odict.pop('lite_coord', None) for odict in obs_list] if None in coord: assert is_lite is False, 'Missing coordinate data.' logger.warn( "Missing lite_coord data. Setting to coord with largest weight in weight_grid" ) for i, _ in enumerate(obs_list): if coord[i] is None: max_weight = max([( v, k, ) for k, v in weight[i].items()]) coord[i] = max_weight[1] if cls.length is not None: logger.warn('Overwriting ObservationData.length') cls.length = len(obs_list) if cls.uncertainty is not None: logger.warn('Overwriting ObservationData.uncertainty') cls.uncertainty = unc if cls.offset_term is not None: logger.warn('Overwriting ObservationData.offset_term') cls.offset_term = off if cls.lite_coord is not None: logger.warn('Overwriting ObservationData.lite_coord') cls.lite_coord = coord if cls.misc_meta is not None: logger.warn('Overwriting ObservationData.misc_meta') cls.misc_meta = obs_list if is_lite is False: if cls.weight_grid is not None: logger.warn('Overwriting ObservationData.weight_grid') cls.weight_grid = weight if is_lite is True: all_spcs = set(str(coord[-1]) for coord in cls.lite_coord) else: all_spcs = set() for w in cls.weight_grid: spcs = set(str(coord[-1]) for coord in w.keys()) all_spcs = all_spcs.union(spcs) if cls.spcs is not None: logger.warn('Overwriting ObservationData.spcs') cls.spcs = sorted(list(all_spcs)) if is_lite is False: dlist = [ dt.replace_date('<YYYYMMDD>', d) for d in dt.get_datelist() ] ind_by_date = {d: [] for d in dlist} for i, weight in enumerate(cls.weight_grid): dates = set(str(coord[0]) for coord in weight.keys()) for d in dates: ind_by_date[d].append(i) if cls.ind_by_date is not None: logger.warn('Overwriting ObservationData.ind_by_date') cls.ind_by_date = ind_by_date return cls(val, is_lite=is_lite)
def map_sense(sensitivity): """ application: map adjoint sensitivities to physical grid of unknowns. input: SensitivityData output: PhysicalAdjointData """ global unit_convert_dict global unit_key if unit_convert_dict is None: unit_convert_dict = get_unit_convert() #check that: #- date_handle dates exist #- PhysicalAdjointData params exist #- template.emis & template.sense_emis are compatible #- template.icon & template.sense_conc are compatible datelist = dt.get_datelist() PhysicalAdjointData.assert_params() #all spcs use same dimension set, therefore only need to test 1. test_spc = PhysicalAdjointData.spcs[0] test_fname = dt.replace_date(template.emis, dt.start_date) mod_shape = ncf.get_variable(test_fname, test_spc).shape #phys_params = ['tsec','nstep','nlays_icon','nlays_emis','nrows','ncols','spcs'] #icon_dict = { spcs: np.ndarray( nlays_icon, nrows, ncols ) } #emis_dict = { spcs: np.ndarray( nstep, nlays_emis, nrows, ncols ) } #create blank constructors for PhysicalAdjointData p = PhysicalAdjointData if inc_icon is True: icon_shape = ( p.nlays_icon, p.nrows, p.ncols, ) icon_dict = {spc: np.zeros(icon_shape) for spc in p.spcs} emis_shape = ( p.nstep, p.nlays_emis, p.nrows, p.ncols, ) emis_dict = {spc: np.zeros(emis_shape) for spc in p.spcs} del p #construct icon_dict if inc_icon is True: icon_label = dt.replace_date('conc.<YYYYMMDD>', datelist[0]) icon_fname = sensitivity.file_data[icon_label]['actual'] icon_vars = ncf.get_variable(icon_fname, icon_dict.keys()) for spc in PhysicalAdjointData.spcs: data = icon_vars[spc][0, :, :, :] ilays, irows, icols = data.shape msg = 'conc_sense and PhysicalAdjointData.{} are incompatible' assert ilays >= PhysicalAdjointData.nlays_icon, msg.format( 'nlays_icon') assert irows == PhysicalAdjointData.nrows, msg.format('nrows') assert icols == PhysicalAdjointData.ncols, msg.format('ncols') icon_dict[spc] = data[ 0:PhysicalAdjointData.nlays_icon, :, :].copy() p_daysize = float(24 * 60 * 60) / PhysicalAdjointData.tsec emis_pattern = 'emis.<YYYYMMDD>' for i, date in enumerate(datelist): label = dt.replace_date(emis_pattern, date) sense_fname = sensitivity.file_data[label]['actual'] sense_data_dict = ncf.get_variable(sense_fname, PhysicalAdjointData.spcs) start = int(i * p_daysize) end = int((i + 1) * p_daysize) if start == end: end += 1 for spc in PhysicalAdjointData.spcs: unit_convert = unit_convert_dict[dt.replace_date(unit_key, date)] sdata = sense_data_dict[spc][:] * unit_convert sstep, slay, srow, scol = sdata.shape #recast to match mod_shape mstep, mlay, mrow, mcol = mod_shape msg = 'emis_sense and ModelInputData {} are incompatible.' assert ((sstep - 1) >= (mstep - 1)) and ((sstep - 1) % (mstep - 1) == 0), msg.format('TSTEP') assert slay >= mlay, msg.format('NLAYS') assert srow == mrow, msg.format('NROWS') assert scol == mcol, msg.format('NCOLS') sense_arr = sdata[:-1, :mlay, :, :] model_arr = sense_arr.reshape( (mstep - 1, -1, mlay, mrow, mcol)).sum(axis=1) #adjoint prepare_model pstep = end - start play = PhysicalAdjointData.nlays_emis prow = PhysicalAdjointData.nrows pcol = PhysicalAdjointData.ncols msg = 'ModelInputData and PhysicalAdjointData.{} are incompatible.' assert ((mstep - 1) >= (pstep)) and ((mstep - 1) % (pstep) == 0), msg.format('nstep') assert mlay >= play, msg.format('nlays_emis') assert mrow == prow, msg.format('nrows') assert mcol == pcol, msg.format('ncols') model_arr = model_arr[:, :play, :, :] phys_arr = model_arr.reshape( (pstep, -1, play, prow, pcol)).sum(axis=1) emis_dict[spc][start:end, ...] += phys_arr.copy() if inc_icon is False: icon_dict = None return PhysicalAdjointData(icon_dict, emis_dict)
def build_filedict(): """ extension: constructed the dictionary of files for the required dates input: None output: None notes: should only be called once, after date_handle has defined dates. """ global all_files model_input_files = {} model_output_files = {} adjoint_forcing_files = {} sensitivity_files = {} all_files['ModelInputData'] = model_input_files all_files['ModelOutputData'] = model_output_files all_files['AdjointForcingData'] = adjoint_forcing_files all_files['SensitivityData'] = sensitivity_files if input_defn.inc_icon is True: model_input_files['icon'] = { 'actual': cmaq_config.icon_file, 'template': template.icon, 'archive': archive.icon_file, 'date': None } #'date': dt.start_date } for date in dt.get_datelist(): ymd = dt.replace_date('<YYYYMMDD>', date) model_input_files['emis.' + ymd] = { 'actual': dt.replace_date(cmaq_config.emis_file, date), 'template': dt.replace_date(template.emis, date), 'archive': dt.replace_date(archive.emis_file, date), 'date': date } model_output_files['conc.' + ymd] = { 'actual': dt.replace_date(cmaq_config.conc_file, date), 'template': template.conc, 'archive': dt.replace_date(archive.conc_file, date), 'date': date } adjoint_forcing_files['force.' + ymd] = { 'actual': dt.replace_date(cmaq_config.force_file, date), 'template': template.force, 'archive': dt.replace_date(archive.force_file, date), 'date': date } sensitivity_files['emis.' + ymd] = { 'actual': dt.replace_date(cmaq_config.emis_sense_file, date), 'template': template.sense_emis, 'archive': dt.replace_date(archive.sens_emis_file, date), 'date': date } sensitivity_files['conc.' + ymd] = { 'actual': dt.replace_date(cmaq_config.conc_sense_file, date), 'template': template.sense_conc, 'archive': dt.replace_date(archive.sens_conc_file, date), 'date': date } return None
""" import os import numpy as np import datetime import matplotlib.pyplot as plt import context import fourdvar.user_driver as user import fourdvar.util.date_handle as dt obs = user.get_observed() nrow = obs.grid_attr['NROWS'] ncol = obs.grid_attr['NCOLS'] nday = len(dt.get_datelist()) tstep = obs.grid_attr['TSTEP'] tsec = (tstep // 10000) * 60 * 60 + ((tstep // 100) % 100) * 60 + tstep % 100 daysec = 24 * 60 * 60 nstep = daysec / tsec coverage = np.zeros(( nrow, ncol, ), dtype=int) obs_step = [] skipped_obs = 0 for weight in obs.weight_grid: #limit to obs the reach the surface
try: assert int(emis_nlay) == emis_nlay emis_nlay = int(emis_nlay) except: print 'invalid emis_nlay' raise if emis_nlay > enlay: raise AssertionError('emis_nlay must be <= {:}'.format(enlay)) # convert tstep into valid time-step if str(tstep).lower() == 'emis': efile = dt.replace_date(cmaq_config.emis_file, dt.start_date) estep = int(ncf.get_attr(efile, 'TSTEP')) tstep = [0, estep] elif str(tstep).lower() == 'single': nday = len(dt.get_datelist()) tstep = [nday, 0] else: try: assert len(tstep) == 2 day, hms = tstep assert int(day) == day day = int(day) assert int(hms) == hms hms = int(hms) except: print 'invalid tstep' raise day, hms = tstep daysec = 24 * 60 * 60
cmaq_config.sense_emis_lays = str(sense_lay) # generate sample files by running 1 day of cmaq (fwd & bwd) cmaq_handle.wipeout_fwd() cmaq_handle.run_fwd_single(dt.start_date, is_first=True) # make force file with same attr as conc and all data zeroed conc_spcs = ncf.get_attr(conc_file, 'VAR-LIST').split() conc_data = ncf.get_variable(conc_file, conc_spcs) force_data = {k: np.zeros(v.shape) for k, v in conc_data.items()} ncf.create_from_template(conc_file, force_file, force_data) cmaq_handle.run_bwd_single(dt.start_date, is_first=True) # create record for icon & emis files fh.ensure_path(os.path.dirname(template.icon)) ncf.copy_compress(icon_file, template.icon) for date in dt.get_datelist(): emis_src = dt.replace_date(cmaq_config.emis_file, date) emis_dst = dt.replace_date(template.emis, date) fh.ensure_path(os.path.dirname(emis_dst)) ncf.copy_compress(emis_src, emis_dst) # create template for conc, force & sense files fh.ensure_path(os.path.dirname(template.conc)) fh.ensure_path(os.path.dirname(template.force)) fh.ensure_path(os.path.dirname(template.sense_emis)) fh.ensure_path(os.path.dirname(template.sense_conc)) ncf.copy_compress(conc_file, template.conc) ncf.copy_compress(force_file, template.force) ncf.copy_compress(sense_emis_file, template.sense_emis) ncf.copy_compress(sense_conc_file, template.sense_conc)