def ini_data(self, **kwargs): """Initializes the chemistry depending on the model used for the inversion. Args: plugin (ChemistryPlugin): chemistry definition Returns: Updates on the fly the chemistry """ info("Initializing the Chemistry") # Copying the chemical scheme to the working directory workdir = self.workdir dirchem_ref = "{}/chemical_scheme/{}/".format(workdir, self.schemeid) self.dirchem_ref = dirchem_ref shutil.rmtree(dirchem_ref, ignore_errors=True) init_dir(dirchem_ref) # If pre-computed scheme is specified if hasattr(self, "dir_precomp"): copy_tree("{}/{}/".format(self.dir_precomp, self.schemeid), dirchem_ref) # Read chemistry self.read_chemicalscheme(**kwargs) # Otherwise, initialize files from the yaml else: self.create_chemicalscheme()
def ini_data(plugin, **kwargs): """Initializes the observation operator Args: plugin (dict): dictionary defining the plugin **kwargs (dictionary): possible extra parameters """ info("Initializing the observation operator") workdir = plugin.workdir # Initializes the directory path.init_dir("{}/obsoperator".format(workdir)) # Initializes transforms init_transform(plugin, plugin.statevect) init_transform(plugin, plugin.obsvect, transform_type="obs") # Re-compile model if necessary if hasattr(plugin.model, "compile"): plugin.model.compile() return plugin
def ini_data(plugin, **kwargs): dir_regrid = "{}/transforms/regrid/".format(plugin.workdir) plugin.dir_regrid = "{}/transforms/regrid/".format(plugin.workdir) # Initializes reference directories if needed init_dir(dir_regrid)
def dump_hcorr(nlon, nlat, sigma_sea, sigma_land, evalues, evectors, dir_dump): """Dumps eigenvalues and vectors to a txt file. The default file format is: '{}/horcor{}x{}cs{}cl{}_lmdz5.txt'.format( dir_dump, nlon, nlat, sigma_sea, sigma_land) """ ncell = evalues.size file_dump = "{}/horcor_{}x{}_cs{}_cl{}_lmdz.bin".format( dir_dump, nlon, nlat, sigma_sea, sigma_land) if os.path.isfile(file_dump): raise IOError("Warning: {} already exists. " "I don't want to overwrite it".format(file_dump)) datasave = np.concatenate((evalues[np.newaxis, :], evectors), axis=0) # Creating path if does not exist if not os.path.isdir(os.path.dirname(file_dump)): init_dir(os.path.dirname(file_dump)) # Saving data np.array(datasave).tofile(file_dump)
def ini_data(plugin, **kwargs): """Initializes the state vector from information in the Yaml file Args: plugin (pycif.classes.plugins): the plugin to initialize """ # Initializes reference directories if needed init_dir("{}/statevect/".format(plugin.workdir)) # Saves reference directories and file formats if not prescribed # Look for directory by order of priority: # 1) directly in tracer definition # 2) in component definition if specified # 3) in model fluxes if any # Getting the right emissions init_components(plugin) # Initializes the control vector plugin.init_xb(plugin, **kwargs) # TODO: fix general data structure to enable compatibility # between control vector and transforms plugin.datastore = {} # Initializing the product of chi by B^(1/2), only if components specified if hasattr(plugin, "components"): plugin.init_bprod(plugin, **kwargs)
def fetch( ref_dir, ref_file, input_dates, target_dir, tracer=None, filetypes=["defstoke", "fluxstoke", "fluxstokev", "phystoke"], **kwargs ): """Reads meteorology and links to the working directory Args: meteo (dictionary): dictionary defining the domain. Should include dirmeteo to be able to read the meteorology datei (datetime.datetime): initial date for the inversion window datef (datetime.datetime): end date for the inversion window workdir (str): path to the working directory where meteo files should be copied logfile (str): path to the log file filetypes ([str]): list of file radicals to copy in the working directory **kwargs (dictionary): extra arguments Return: ???????? Notes: At some point, include option to compute mass fluxes for LMDz, with different physics What is needed to do that? Possible only on CCRT? Flexibility to define new domains Can be very heavy and not necessarily relevant """ info("Copying meteo files from {} to {}".format(ref_dir, target_dir)) # Create the sub-directory to store meteo files path.init_dir(target_dir) # Loop over dates and file types for date in input_dates: for filetype in filetypes: meteo_file = "{}.an{}.m{:02d}.nc".format( filetype, date.year, date.month ) if filetype == "defstoke" and not os.path.isfile( ref_dir + meteo_file ): meteo_file = filetype + ".nc" target = "{}/{}".format(target_dir, meteo_file) source = "{}/{}".format(ref_dir, meteo_file) path.link(source, target) list_files = {datei: [] for datei in input_dates} list_dates = {datei: [] for datei in input_dates} return list_files, list_dates
def ini_data(plugin, **kwargs): # Initializes the directory workdir = getattr(plugin, "workdir", "./") path.init_dir("{}/simulator".format(workdir)) os.system("rm -f {}/simulator/cost.txt".format(workdir)) os.system("rm -f {}/simulator/gradcost.txt".format(workdir)) # Initializing Singular Value Decomposition if required plugin.do_svd = getattr(plugin, "do_svd", False) if plugin.do_svd: plugin.svd_vectors = svd_init(plugin.obsvect.datastore)
def ini_data(plugin, **kwargs): """Initializes CHIMERE Args: plugin (dict): dictionary defining the plugin **kwargs (dictionary): possible extra parameters Returns: loaded plugin and directory with executable """ info("Initializing the model") workdir = getattr(plugin, "workdir", "./") # Initializes the directory path.init_dir("{}/model".format(workdir)) # Default values: # period: '1D' plugin.periods = getattr(plugin, "periods", "1D") # Number of hours per period plugin.nhours = int( pd.to_timedelta(plugin.periods).total_seconds() // 3600) plugin.nho = "{:.0f}".format(plugin.nhours) # Replacing nsaveconcs if not specified # Forces the end.nc file to contain concentration every N hours # By default, saves only at the end if not hasattr(plugin, "nsaveconcs"): plugin.nsaveconcs = plugin.nhours # Replace name for METEO files plugin.meteo.file = plugin.meteo.file.format(nho=plugin.nho) # Replace name for AEMISSION files and BEMISSIONS files plugin.fluxes.file = plugin.fluxes.file.format(nho=plugin.nho) plugin.fluxes.nlevemis = plugin.nlevemis plugin.biofluxes.file = plugin.biofluxes.file.format(nho=plugin.nho) plugin.biofluxes.nlevemis = plugin.nlevemis_bio # Replace name for BOUN_CONCS files plugin.latcond.file = plugin.latcond.file.format(nho=plugin.nho) plugin.topcond.file = plugin.topcond.file.format(nho=plugin.nho) return plugin
def ini_data(plugin, **kwargs): """Initializes the observation operator Args: plugin (dict): dictionary defining the plugin **kwargs (dictionary): possible extra parameters """ verbose("Initializing the observation operator") workdir = plugin.workdir # Initializes the directory path.init_dir('{}/obsoperator'.format(workdir)) return plugin
def ini_data(plugin, **kwargs): """Initializes the observation vector from information in the Yaml file Args: """ # Set dump type if not defined; default is nc if not hasattr(plugin, "dump_type"): plugin.dump_type = "nc" # Set default file_obsvect file_default = "{}/obsvect/monitor.{}".format(plugin.workdir, plugin.dump_type) plugin.file_obsvect = getattr(plugin, "file_obsvect", file_default) # Keeping check_monitor as a class method plugin.check_monitor = MethodType(check_monitor, plugin) # Initializing y0 measurements = plugin.measurements plugin.dump_type = getattr(plugin, "dump_type", "nc") init_y0(plugin, measurements, **kwargs) # Initialize R if any observation if plugin.datastore.size > 0: init_rinvprod(plugin, measurements, **kwargs) print("Link satellite files to the working directory") print(hasattr(plugin, "dir_satellites"), plugin.workdir) if hasattr(plugin, "dir_satellites"): path.init_dir("{}/obsvect/satellites/".format(plugin.workdir)) for dd in plugin.model.subsimu_dates[:-1]: sat_files = glob.glob( dd.strftime("{}/infos_*%Y%m%d%H%M.nc").format( plugin.dir_satellites)) for obs_file in sat_files: target = "{}/obsvect/satellites/{}".format( plugin.workdir, os.path.basename(obs_file)) path.link(obs_file, target) plugin.has_satellites = False if np.where((plugin.datastore["level"] < 0))[0].size > 0: plugin.has_satellites = True
def ini_data(plugin, **kwargs): """Initializes the dummy_txt Gaussian model Args: plugin (Plugin): the model plugin to initialize **kwargs (dictionary): possible extra parameters Returns: loaded plugin and directory with executable """ info("Initializing the model") workdir = getattr(plugin, "workdir", "./") # Initializes the directory path.init_dir("{}/model".format(workdir)) # copying the model Pasquill Gifford matrix target = "{}/model/".format(workdir) + os.path.basename(plugin.file_pg) source = plugin.file_pg shutil.copy(source, target) # Required inputs for running a LMDz simulations plugin.required_inputs = ["fluxes", "meteo", "param"] # Initializes default values: # - sub-simulations of 1day # - time steps of 1 hour plugin.periods = getattr(plugin, "periods", "1D") plugin.tstep = getattr(plugin, "tstep", "1H") plugin.save_H = getattr(plugin, "save_H", False) plugin.H_matrix = {} return plugin
def init_log(logfile, workdir, loglevel): """Initializes the log file for verbose outputs. Args: logfile (str): log file name workdir (str): directory where to create the logfile loglevel (int): level of verbosity. 2 for debug level, 1 for standard outputs Returns: (str, str) : (full path to the log file, absolute path to the working directory) Notes: Beware that the function overwrites any existing log file. """ # Turning the path to absolute and creating the directory workdir, _ = path.init_dir(workdir) if not os.path.isabs(logfile): logfile = "{}/{}".format(workdir, logfile) # Beware that the log_file is over-writen anyway # Flushing if exists open(logfile, "w").close() # Transform pycif verbose level to logging verbose level level = logging.DEBUG if loglevel == 2: level = logging.INFO elif loglevel > 2: level = logging.WARNING # Set up colored log stream_handler = logging.StreamHandler() formatter = ColorFormatter(fmt="#(level)%(message)s") stream_handler.setFormatter(formatter) file_handler = logging.FileHandler(logfile) formatter = ColorFormatter( fmt="%(asctime)s: %(message)s", datefmt="%Y-%m-%d %H:%m:%S" ) file_handler.setFormatter(formatter) logger = logging.getLogger("") logger.setLevel(level) logger.addHandler(stream_handler) logger.addHandler(file_handler) return logfile, workdir
def dump(self, cntrl_file, to_netcdf=False, dir_netcdf=None, **kwargs): """Dumps a control vector into a pickle file. Does not save large correlations. Args: self (pycif.utils.classes.controlvects.ControlVect): the Control Vector to dump file (str): path to the file to dump as pickle to_netcdf (bool): save to netcdf files if True dir_netcdf (str): root path for the netcdf directory """ # Saving recursive attributes from the Yaml exclude = [ "transform", "domain", "datastore", "input_dates", "obsvect", "tracer", "tstep_dates", "tstep_all", "dataflx" ] tosave = self.to_dict(self, exclude_patterns=exclude) # Adding control vector values if hasattr(self, "x"): tosave["x"] = self.x if hasattr(self, "dx"): tosave["dx"] = self.dx if hasattr(self, "xb"): tosave["xb"] = self.xb # Dumping the dictionary to a pickle with open(cntrl_file, "wb") as f: pickle.dump(tosave, f, pickle.HIGHEST_PROTOCOL) # Dumping to an ensemble of NetCDF files if not to_netcdf or dir_netcdf is None: return components = self.components for comp in components.attributes: component = getattr(components, comp) dir_comp = "{}/{}".format(dir_netcdf, comp) init_dir(dir_comp) for trcr in component.parameters.attributes: tracer = getattr(component.parameters, trcr) # Translating x and xb to maps x = np.reshape( self.x[tracer.xpointer:tracer.xpointer + tracer.dim], (tracer.ndates, -1), ) x = scale2map(x, tracer, tracer.dates, self.domain) xb = np.reshape( self.xb[tracer.xpointer:tracer.xpointer + tracer.dim], (tracer.ndates, -1), ) xb = scale2map(xb, tracer, tracer.dates, self.domain) std = np.reshape( self.std[tracer.xpointer:tracer.xpointer + tracer.dim], (tracer.ndates, -1), ) std = scale2map(std, tracer, tracer.dates, self.domain) ds = xr.Dataset({"x": x, "xb": xb, "std": std}) ds.to_netcdf("{}/statevect_{}_{}.nc".format(dir_comp, comp, trcr))
def obsoper(self, inputs, mode, run_id=0, datei=datetime.datetime(1979, 1, 1), datef=datetime.datetime(2100, 1, 1), workdir='./', reload_results=False, **kwargs): """The observation operator. This function maps information from the control space to the observation space This version of 'obsoper' was developed for use with FLEXPART backward runs. Gets concentrations/mixing ratios from FLEXPART flux sensitivities. For now assumes FLEXPART runs are stored in the structure used by FLEXINVERT+, .i.e, /station_id/YYYYMM/ if FWD Turns back model-equivalents into observation space Hx(native) -> Hx(obs) Generates departures and multiply by R-1 -> Hx - y0, R^(-1).(Hx - y0) Calculates gradient in observation space Jo'(p) = H^TR^-1(H(p) - y) Args: inputs: can be a control vector or an observation vector, depending on the mode mode (str): the running mode; always 'fwd' for the flexpart version run_id (int): the ID number of the current run; is used to define the current sub-directory datei, datef (datetime.datetime): start and end dates of the simulation window workdir (str): the parent directory of the simulation to run reload_results (bool): look for results from pre-computed simulation if any Returns: observation or control vector depending on the running mode """ # Check that inputs are consistent with the mode to run check_inputs(inputs, mode) # If true, do not run simulations at this point read_only = getattr(self, 'read_only', True) print("read_only", read_only) # Initializing modules and variables from the setup model = self.model controlvect = self.controlvect obsvect = self.obsvect subsimu_dates = model.subsimu_dates if mode == 'fwd': controlvect = inputs obsvect.datastore.loc[:, 'sim'] = np.nan # Various initializations # Get flexpart headers subdir = subsimu_dates[0].strftime("%Y%m") fp_header_glob = model.utils.flexpart_header.Flexpartheader() fp_header_glob.read_header( os.path.join(model.run_dir_glob, obsvect.datastore.head(1)['station'][0].upper(), subdir, 'header')) fp_header_nest = None if model.plugin.nested: fp_header_nest = model.utils.flexpart_header.Flexpartheader() fp_header_nest.read_header( os.path.join(model.run_dir_nest, obsvect.datastore.head(1)['station'][0].upper(), subdir, 'header_nest')) # Get the domain definition species = getattr(controlvect.components, 'fluxes').parameters.attributes[0] tracer = getattr( getattr(controlvect.components, 'fluxes').parameters, species) if tracer.hresol == 'regions': nbox = tracer.nregions # TODO: this will change once initial conditions are added (ciniopt) npvar = tracer.ndates * nbox ndvar = nbox nvar = npvar grad_o = np.zeros(nvar) ix1 = model.domain.ix1 ix2 = model.domain.ix2 iy1 = model.domain.iy1 iy2 = model.domain.iy2 else: raise Exception( "For FLEXPART, only hresol:regions are implemented in controlvect") # Loop through model periods and read model output self.missingperiod = False for di, df in zip(subsimu_dates[:-1], subsimu_dates[1:]): subdir = di.strftime("%Y%m") # Save to datastore for debugging purposes obs_ghg = np.empty(len(obsvect.datastore)) obs_ghg[:] = np.nan obs_bkg = np.empty(len(obsvect.datastore)) obs_bkg[:] = np.nan obs_sim = np.empty(len(obsvect.datastore)) obs_sim[:] = np.nan obs_model = np.empty(len(obsvect.datastore)) obs_model[:] = np.nan obs_check = np.empty(len(obsvect.datastore)) obs_check[:] = np.nan obs_bkgerr = np.empty(len(obsvect.datastore)) obs_bkgerr[:] = np.nan obs_err = np.empty(len(obsvect.datastore)) obs_err[:] = np.nan # Loop over observation dates print("di, df", di, df, datetime.datetime.now()) for obs_i, row in enumerate(obsvect.datastore.itertuples()): # For debugging obs_check[obs_i] = obs_i station = row.station runsubdir_nest = os.path.join(model.run_dir_nest, station.upper(), subdir) runsubdir_glob = os.path.join(model.run_dir_glob, station.upper(), subdir) file_date = row.Index.strftime('%Y%m%d%H%M%S') # Read nested grids file_name = 'grid_time_nest_' + file_date + '_001' if not os.path.isfile(os.path.join(runsubdir_nest, file_name)): continue grid_nest, gtime, ngrid = model.utils.read.read_flexpart_grid( runsubdir_nest, file_name, fp_header_nest, numscale=model.numscale) grid_nest *= model.coeff * model.mmair / model.molarmass # Read global footprints # TODO read correction factor dry air file_name = 'grid_time_' + file_date + '_001' if not os.path.isfile(os.path.join(runsubdir_glob, file_name)): continue grid_glob, gtime_glob, ngrid_glob = \ model.utils.read.read_flexpart_grid( runsubdir_glob, file_name, fp_header_glob, numscale=model.numscale) grid_glob *= model.coeff * model.mmair / model.molarmass # Array for global transport background hbkg = np.sum(grid_glob[:, :, 0:ngrid - 1], axis=2) hbkg[ix1:ix2, iy1:iy2] = 0.0 # Index to state vector ind = np.argmin(np.abs(tracer.dates[0::-1] - gtime_glob[0])) obs_bkg[obs_i] = np.sum(hbkg[:, :].T * controlvect.flx_bkg[ind, :, :]) obs_bkgerr[obs_i] = obs_bkg[obs_i] * tracer.err # Transport for boxes in regions hbox = np.zeros((nbox * ngrid), np.float64) mask_reg = tracer.regions > 0 for n in range(ngrid): inds = n * nbox + tracer.regions[mask_reg] - 1 np.add.at(hbox, inds, grid_nest.T[n, mask_reg]) # for jy in range(fp_header_nest.numy): # for ix in range(fp_header_nest.numx): # if tracer.regions[jy, ix] > 0: # hbox[n*nbox + tracer.regions[jy, ix] - 1] += \ # grid_nest[ix, jy, n] # TODO: account for possible difference nested grid/inversion domain hnest = grid_nest istate = np.zeros(ngrid, dtype=int) - 1 # Calculate indices to state vector for i, j in enumerate(gtime): if j > df: istate[i] = -1 else: # Discard 1st tracer date in the comparison mask = j - tracer.dates[1::] <= datetime.timedelta(0) istate[i] = int(np.argmax(mask)) if np.max(istate) < 0: continue # ntstep: number of inversion intervals covered by the footprints ntstep = int( np.max(istate[istate > -1]) - np.min(istate[istate > -1]) + 1) hx = np.zeros(ntstep * nbox) px = np.zeros(ntstep * nbox) for i in range(ngrid): if istate[i] == -1: continue ns = istate[i] - np.min(istate[istate > -1]) + 1 px[(ns - 1) * nbox:ns * nbox] = \ controlvect.x[istate[i] * nbox:(istate[i] + 1) * nbox] hx[(ns - 1) * nbox:ns * nbox] += hbox[i * nbox:(i + 1) * nbox] obs_model[obs_i] = np.dot(hx, px) # Change in mixing ratio from best guess estimate obs_ghg[obs_i] = 0. for i, itim in enumerate(gtime): ind = np.argmin(np.abs(tracer.dates[0::-1] - itim)) obs_ghg[obs_i] += np.sum(hnest.T[i, :, :] * controlvect.flxall[ind, :, :]) if getattr(tracer, 'offsets', False): # Optimize offsets obs_sim[obs_i] = obs_model[obs_i] \ + obs_ghg[obs_i] + obs_bkg[obs_i] else: # Optimize fluxes obs_sim[obs_i] = obs_model[obs_i] + obs_bkg[obs_i] # calculate gradient in observation space # Jo'(p) = H^TR^-1(H(p) - y) # calculate as: Jo'(p) = sum( H_i^T*ydel_i*R_i ) # Contribution to observation gradient from obs_i departure = obs_sim[obs_i] - row.obs istate_uni = np.unique(istate).astype(int) for n in range(ntstep): if istate_uni[n] > -1: grad_o[istate_uni[n] * ndvar: (istate_uni[n] + 1) * ndvar] += \ hx[n * ndvar:(n + 1) * ndvar] \ * departure / (row.obserror ** 2 + obs_bkgerr[obs_i] ** 2) print(obs_i, row.obs, obs_sim[obs_i]) obsvect.dx = grad_o # Add the different components to datastore obsvect.datastore['obs_bkgerr'] = obs_bkgerr obsvect.datastore['sim'] = obs_sim obsvect.datastore['obs_ghg'] = obs_ghg obsvect.datastore['obs_bkg'] = obs_bkg obsvect.datastore['obs_model'] = obs_model obsvect.datastore['obs_sim'] = obs_sim obsvect.datastore['obs_check'] = obs_check obsvect.datastore['obs_err'] = np.sqrt( obsvect.datastore['obs_bkgerr']**2 + obsvect.datastore['obserror']**2) # Save grad_o for inspection rundir = "{}/obsoperator".format(workdir) file_grado = '{}/grad_o_{}.txt'.format(rundir, run_id) np.savetxt(file_grado, grad_o, fmt='%.8e') model.output_read = True created = os.path.isdir(runsubdir_nest) # If the sub-directory was already created, # the observation operator considers that the sub-simulation # was already properly run, thus passing to next sub-periods # Compute the sub-simulation anyway if some previous periods # were missing (as stored in self.missingperiod) do_simu = (created or not getattr(self, 'autorestart', False) or self.missingperiod) and not read_only self.missingperiod = do_simu # Some verbose # verbose("Running {} for the period {} to {}" # .format(model.plugin.name, di, df)) # verbose("Running mode: {}".format(mode)) # verbose("Sub-directory: {}".format(runsubdir_nest)) # print "do_simu", do_simu # print "read_only", read_only # Prepare observations for the model if not read_only: model.dataobs = obsvect.obsvect2native(di, df, mode, runsubdir_nest, workdir) # If only initializing inputs, continue to next sub-period if getattr(self, 'onlyinit', False): continue model.chain = min(di, df) # If only initializing inputs, exit if getattr(self, 'onlyinit', False): verbose("The run was correctly initialized") return # Re-initalizing the chain argument if hasattr(model, 'chain'): del model.chain if mode in ['fwd']: controlvect.dump("{}/controlvect_{}".format(rundir, run_id), to_netcdf=getattr(controlvect, 'save_out_netcdf', True), dir_netcdf='{}/controlvect/'.format(rundir), run_id=run_id) rundir = "{}/obsoperator/{}".format(workdir, mode) path.init_dir(rundir) dump_type = obsvect.dump_type dump_datastore(obsvect.datastore, file_monit='{}/monitor_{}_.{}'.format( rundir, run_id, dump_type), mode='w', dump_type=dump_type, col2dump=[ 'obs_ghg', 'obs_bkg', 'obs_model', 'obs_sim', 'obs_check', 'obs_bkgerr', 'obs_err', 'obs_hx' ]) # Returning obsvect to the simulator if mode is 'fwd': return obsvect
def parse_tracers(self, datei, datef, file_monitor="", workdir="", **kwargs): """Parses all observation files related to the tracers specified as inputs Args: self (Measurement) : dictionary of tracers as defined in the Yaml file datei (datetime.datetime): initial date for the inversion window datef (datetime.datetime): end date for the inversion window file_monitor (str): file with pre-compile observations if exists workdir (str): working directory logfile (str): path to the log file for verbose instances **kwargs (dictionary) : any additional argument that might be useful for extracting observations. Default contains config_dict Returns: dictionary : dictionary with all observations Notes: The data that are kept in the datastore are also saved in a monit_standard.txt file for debugging """ # Dump type: default is nc self.dump_type = getattr(self, "dump_type", "nc") # If file_monitor is defined, tries reading it if hasattr(self, "file_monitor"): file_monitor = self.file_monitor try: info("Extracting measurements from {}".format(file_monitor)) return dump.read_datastore(file_monitor, dump_type=self.dump_type, **kwargs) except IOError as e: info(e.message) info("Could not find a monitor file, reading observations") except Exception as e: info(e) info("Could not read the specified monitor file: {}", file_monitor) raise e # Otherwise, create the monitor from observations if hasattr(self, "workdir"): workdir = self.workdir file_monitor = workdir + "/obs/monit_standard.nc" # If the measurement definition is empty in the Yaml, # return an empty datastore if not hasattr(self, "species"): return init_empty() # Loops through tracers if monitor not found path.init_dir(workdir + "/obs/") shutil.rmtree(file_monitor, ignore_errors=True) datastore = {} # Merging species datastores into one data for spec in self.species.attributes: specattr = getattr(self.species, spec) if hasattr(specattr, "format") and hasattr(specattr, "provider"): info("Extracting measurements for {}" " with a single provider {}".format(spec, specattr.provider)) parser = ObsParser.get_parser(specattr) datastore[spec] = parser.parse_multiple_files(spec=spec) datastore[spec] = datastore[spec].loc[str(datei):str(datef)] continue else: info("Extracting measurements for {} from multiple providers". format(spec)) # Looping over providers dataspec = {} for provider in specattr.attributes: # Get the observation parser providattr = getattr(specattr, provider) parser = ObsParser.get_parser(providattr) # Reading all files from provider dataspec[provider] = parser.parse_multiple_files(spec=spec) # Cropping to the inversion window dataspec[provider] = dataspec[provider].loc[str(datei):str(datef)] datastore[spec] = pd.concat(list(dataspec.values())) # Grouping species into a single datastore datastore = pd.concat(list(datastore.values())) # Dumping dump.dump_datastore(datastore, file_monitor, workdir, dump_type=self.dump_type, **kwargs) return datastore
def init_y0(obsvect, measurements, **kwargs): """Initializes the observation vector. In most cases the observation vector is similar to the measurement vector but there is no reason for it to be the same, especially when super-observations are used (e.g. daily or afternoon averages, gradients, etc.) Args: obsvect (Plugin): the observation vector with all its attributes measurements (Plugin): the pre-loaded measurements Returns: obsvect, updated with horizontal and vertical coordinates, as well as model time steps """ # Saves initialized observation vector to workdir/obsvect/monitor # Try linking file_obsvect to workdir/obsvect if existing # Otherwise, define it and generate it later on path.init_dir("{}/obsvect".format(obsvect.workdir)) file_monitor = "{}/obsvect/monitor.{}".format(obsvect.workdir, obsvect.dump_type) shutil.rmtree(file_monitor, ignore_errors=True) if os.path.isfile(obsvect.file_obsvect): path.link(obsvect.file_obsvect, file_monitor) # From here, no interaction with files outside the working directory obsvect.file_obsvect = file_monitor # Try reading file allcorrec, ok_hcoord, ok_vcoord, do_tstep = False, False, False, True exclude_stations = getattr(obsvect, "exclude_stat", []) try: # Reading a monitor file if available info("Try opening {}".format(file_monitor)) obsvect.datastore = read_datastore(file_monitor, dump_type=obsvect.dump_type) # Check that the monitor is consistent with the simulation to be run # If True, returns directly the observation as is allcorrect, ok_hcoord, ok_vcoord, do_tstep = obsvect.check_monitor() if allcorrect and len(exclude_stations) == 0: return obsvect # Otherwise, recompute necessary items (i, j, lev, tstep) # of the observation error raise PluginError # If the monitor file could not be opened, start back from measurements except IOError: info("Couldn't open the observation file") info("Generating observation vector from measurements") # Copying attributes from measurements for key in measurements.attributes: setattr(obsvect, key, getattr(measurements, key)) # Copying datastore from measurements if existing and not empty obsvect.datastore = getattr(measurements, "datastore", init_empty()) except PluginError: info("Warning! The prescribed monitor file is not consistent with " "the current simulation. Re-analysing it") # Remove unwanted stations if len(exclude_stations) > 0: mask = ~(obsvect.datastore["station"].isin(exclude_stations)) obsvect.datastore = obsvect.datastore.loc[mask] # Crops y0 to the simulation period obsvect.datastore = crop_monitor(obsvect.datastore, obsvect.datei, obsvect.datef, **kwargs) # Computes grid cells where observations fall if not ok_hcoord: obsvect = hcoord(obsvect, **kwargs) # Computes model layer where observations fall if not ok_vcoord: obsvect = vcoord(obsvect, **kwargs) # Compute time steps corresponding to the observations if do_tstep: obsvect = tstep(obsvect, **kwargs) # Dumps the datastore # Attributes to keep in the monitor if NetCDF format (recommanded!) nc_attributes = { "datei": obsvect.datei.strftime("%d-%m-%Y %H:%M:%S"), "datef": obsvect.datef.strftime("%d-%m-%Y %H:%M:%S"), "model name": obsvect.model.plugin.name, "model version": obsvect.model.plugin.version, "domain nlon": str(obsvect.model.domain.nlon), "domain nlat": str(obsvect.model.domain.nlat), } if getattr(obsvect, "dump", True): dump_datastore( obsvect.datastore, file_monit=obsvect.file_obsvect, dump_type=obsvect.dump_type, nc_attributes=nc_attributes, mode="w", ) return obsvect
def obsoper(self, inputs, mode, run_id=0, datei=datetime.datetime(1979, 1, 1), datef=datetime.datetime(2100, 1, 1), workdir="./", reload_results=False, **kwargs): """The observation operator. This function maps information from the control space to the observation space and conversely depending on the running mode. Generates model inputs from the control vector inputs(x) Turns observations into model compatible extraction points i.e. y0 (fwd) or dy* (adj) Runs the model to get model extractions i.e. Hx (fwd) or H^T.dy* (adj) if FWD Turns back model-equivalents into observation space Hx(native) -> Hx(obs) Generates departures and multiply by R-1 -> Hx - y0, R^(-1).(Hx - y0) if ADJ Turns native increments to control space increments H^T.dy* (native) -> H^T.dy* (control) Translates control increments to chi values -> B^(1/2) . H^T.dy* Args: inputs: can be a control vector or an observation vector, depending on the mode mode (str): the running mode; should be one of 'fwd', 'tl' or 'adj' run_id (int): the ID number of the current run; is used to define the current sub-directory datei, datef (datetime.datetime): start and end dates of the simulation window workdir (str): the parent directory of the simulation to run reload_results (bool): look for results from pre-computed simulation if any Returns: observation or control vector depending on the running mode """ # Check that inputs are consistent with the mode to run check_inputs(inputs, mode) # Create of sub- working directory for the present run rundir = "{}/obsoperator/{}_{:04d}/".format(workdir, mode, run_id) path.init_dir(rundir) # Create save directory for chaining sub-simulations path.init_dir("{}/chain/".format(rundir, run_id)) # Return results from previous versions if existing if reload_results: if mode in ["fwd", "tl"]: try: # Saving the directory for possible later use by the adjoint self.model.adj_refdir = rundir # Trying reading the monitor if any obsvect = self.obsvect obsvect.datastore = read_datastore( "{}/monitor.nc".format(rundir)) return obsvect except IOError: info("There is no monitor file to be recovered. " "Compute the full forward simulation") elif mode == "adj": try: statevect = self.statevect statevect.load("{}/statevect.pickle".format(rundir)) return statevect except IOError: info("There is no statevect file to be recovered. " "Compute the full adjoint simulation.") # Initializing modules and variables from the setup model = self.model statevect = self.statevect obsvect = self.obsvect subsimu_dates = model.subsimu_dates if mode in ["fwd", "tl"]: obsvect.datastore.loc[:, "sim"] = 0.0 obsvect.datastore.loc[:, "sim_tl"] = 0.0 # Dumps control vector in forward and tl modes statevect.dump( "{}/statevect.pickle".format(rundir), to_netcdf=getattr(statevect, "save_out_netcdf", False), dir_netcdf="{}/statevect/".format(rundir), ) elif mode == "adj": obsvect = inputs statevect.dx = 0 * statevect.x subsimu_dates = subsimu_dates[::-1] # Loop through model periods and runs the model self.missingperiod = False for di, df in zip(subsimu_dates[:-1], subsimu_dates[1:]): # Create a sub-path for each period runsubdir = rundir + min(di, df).strftime("%Y-%m-%d_%H-%M") _, created = path.init_dir(runsubdir) # If the sub-directory was already created, # the observation operator considers that the sub-simulation # was already properly run, thus passing to next sub-periods # Compute the sub-simulation anyway if some previous periods # were missing (as stored in self.missingperiod) do_simu = (created or not getattr(self, "autorestart", False) or self.missingperiod) self.missingperiod = do_simu # Some verbose info("Running {} for the period {} to {}".format( model.plugin.name, di, df)) info("Running mode: {}".format(mode)) info("Sub-directory: {}".format(runsubdir)) # Prepare observation vector at model resolution model.dataobs = obsvect.obsvect2native(di, df, mode, runsubdir, workdir) model.dataobs = self.do_transforms( obsvect.transform, model.dataobs, obsvect.transform.mapper, "obs", di, df, mode, runsubdir, workdir, trans_mode="fwd", ) # If the simulation was already carried out, pass to next steps # If a sub-period was missing, following ones will be recomputed even # if available if do_simu: # Writing observations for on-line model extraction if any model.native2inputs(model.dataobs, "obs", di, df, runsubdir, mode) # Prepare inputs for the model and carry out on the fly transformations mapper = statevect.transform.mapper self.do_transforms( statevect.transform, statevect, mapper, "state", di, df, mode, runsubdir, workdir, trans_mode="fwd", do_simu=do_simu, onlyinit=getattr(self, "onlyinit", False), ) # If only initializing inputs, continue to next sub-period if getattr(self, "onlyinit", False): continue # Update observation vector if necessary if mode in ["fwd", "tl"] and obsvect.datastore.size > 0: # Read outputs model.outputs2native({}, "obs", di, df, runsubdir, mode) # Apply obs transformation and update obs datastore model.dataobs = self.do_transforms( obsvect.transform, model.dataobs, obsvect.transform.mapper, "obs", di, df, mode, runsubdir, workdir, trans_mode="inv", ) obsvect.native2obsvect(model.dataobs, di, df, runsubdir, workdir) # Update state vector if necessary elif mode == "adj": mapper = statevect.transform.mapper self.do_transforms( statevect.transform, statevect, mapper, "state", di, df, mode, runsubdir, workdir, trans_mode="inv", ) # Keep in memory the fact that it is (or not) a chained simulation model.chain = min(di, df) # If only initializing inputs, exit if getattr(self, "onlyinit", False): info("The run was correctly initialized") return # Re-initalizing the chain argument if hasattr(model, "chain"): del model.chain # Dump observation vector for later use in fwd and tl modes # Otherwise dumps the control vector if mode in ["fwd", "tl"]: dump_type = obsvect.dump_type dump_datastore( obsvect.datastore, file_monit="{}/monitor.{}".format(rundir, dump_type), mode="w", dump_type=dump_type, ) elif mode == "adj": statevect.dump("{}/statevect.pickle".format(rundir)) # Cleaning unnecessary files if getattr(model, "autoflush", False): info("Flushing unnecessary files in {}".format(rundir)) model.flushrun(rundir, mode) # Returning the output object depending on the running mode if mode in ["fwd", "tl"]: return obsvect if mode == "adj": return statevect
def create_chemicalscheme(self): """ Read the mandatory chemistry files, create the optional ones and set the species attributes consistently with the data retrieved """ # Common variables workdir = self.workdir mecachim = self.schemeid dirchem_ref = self.dirchem_ref # Initializes number of species and reactions self.nacspecies = (len(self.acspecies.attributes) if hasattr( self, "acspecies") else 0) self.nemisspec = (len(self.emis_species.attributes) if hasattr( self, "emis_species") else 0) self.nemisspec_interp = (len(self.emis_species_interp.attributes) if hasattr(self, "emis_species_interp") else 0) self.nprspecies = (len(self.prescrconcs.attributes) if hasattr( self, "prescrconcs") else 0) self.nprodspecies = (len(self.prodloss3d.attributes) if hasattr( self, "prodloss3d") else 0) self.ndepspecies = (len(self.deposition.attributes) if hasattr( self, "deposition") else 0) self.nreacs = (len(self.reactions.attributes) if hasattr( self, "reactions") else 0) self.nfamilies = (len(self.families.attributes) if hasattr( self, "families") else 0) # Cleaning the target directory os.system("rm -rf {}".format(dirchem_ref)) init_dir(dirchem_ref) # List of files finf = "{}/chemical_scheme.nml".format(dirchem_ref) filer = "{}/REACTIONS.{}".format(dirchem_ref, mecachim) fileps = "{}/PRESCRIBED_SPECIES.{}".format(dirchem_ref, mecachim) filepl = "{}/PRODLOSS_SPECIES.{}".format(dirchem_ref, mecachim) filedp = "{}/DEPO_SPECIES.{}".format(dirchem_ref, mecachim) filea = "{}/ANTHROPIC.{}".format(dirchem_ref, mecachim) fileb = "{}/BIOGENIC.{}".format(dirchem_ref, mecachim) mandatory_files = [filer, fileps, filepl, filedp] create_mandchem(self, mandatory_files) files = "{}/STOICHIOMETRY.{}".format(dirchem_ref, mecachim) filec = "{}/CHEMISTRY.{}".format(dirchem_ref, mecachim) filerr = "{}/REACTION_RATES.{}".format(dirchem_ref, mecachim) filej = "{}/PHOTO_RATES.{}".format(dirchem_ref, mecachim) filef = "{}/FAMILIES.{}".format(dirchem_ref, mecachim) fileals = "{}/ALL_SPECIES.{}".format(dirchem_ref, mecachim) fileas = "{}/ACTIVE_SPECIES.{}".format(dirchem_ref, mecachim) nallqmax, nphoto_rates = create_optchem(self, filer, fileps) # Create chemical_scheme.nml namelist os.system('echo "&args" > {}'.format(finf)) os.system("echo \"fnacspec = '{}'\" >> {}".format(fileas, finf)) os.system("echo \"fnallspec = '{}'\" >> {}".format(fileals, finf)) os.system("echo \"fnprescr = '{}'\" >> {}".format(fileps, finf)) os.system("echo \"fnprodl = '{}'\" >> {}".format(filepl, finf)) os.system("echo \"fndep = '{}'\" >> {}".format(filedp, finf)) os.system("echo \"fnchem = '{}'\" >> {}".format(filec, finf)) os.system("echo \"fnstoi = '{}'\" >> {}".format(files, finf)) os.system("echo \"fnrates = '{}'\" >> {}".format(filerr, finf)) os.system("echo \"fnjrates = '{}'\" >> {}".format(filej, finf)) os.system("echo \"fnfamilies = '{}'\" >> {}".format(filef, finf)) os.system('echo "iqmax = {}" >> {}'.format(self.nspecies, finf)) os.system('echo "iallqmax = {}" >> {}'.format(nallqmax, finf)) os.system('echo "iprescrmax = {}" >> {}'.format(self.nprspecies, finf)) os.system('echo "iprodmax = {}" >> {}'.format(self.nprodspecies, finf)) os.system('echo "idepmax = {}" >> {}'.format(self.ndepspecies, finf)) os.system('echo "nreac = {}" >> {}'.format(self.nreacs, finf)) os.system('echo "ijratesmax = {}" >> {}'.format(nphoto_rates, finf)) os.system('echo "/" >> {}'.format(finf))
def init_components(plugin): if hasattr(plugin, "components"): components = plugin.components for comp in components.attributes: component = getattr(components, comp) # Fetch parameters # If no parameters, handle the component as a whole if not hasattr(component, "parameters"): params = component parameters = [""] else: params = component.parameters parameters = params.attributes[:] + [""] # Loop over parameters to fetch information for trcr in parameters: tracer = getattr(params, trcr, component) # By default is not in the target vector tracer.iscontrol = False # Fetch reference directory and file format trac_dir = getattr( tracer, "dir", getattr(component, "dir", getattr(components, "dir", "")), ) trac_file = getattr(tracer, "file", getattr(component, "file", "")) tracer.varname = getattr(tracer, "varname", "") # Initializes target directory and pass info to tracer target_dir = "{}/statevect/{}/{}".format( plugin.workdir, comp, trcr) init_dir(target_dir) tracer.dir = target_dir tracer.file = trac_file # Forces the tracer to have an empty read function if not hasattr(tracer, "read"): tracer = tracer.get_subclass("fields")(plg_orig=tracer) if trcr != "": setattr(component.parameters, trcr, tracer) else: setattr(plugin.components, comp, tracer) # Gets read/fetch from model if not already defined # Passing the tracer to the read function for attribute access mod = plugin.model backups = list(mod.backup_comps.values()) cmp = (comp if hasattr(mod, comp) else (list(mod.backup_comps.keys())[backups.index(comp)] if comp in backups else None)) if cmp is None: raise Exception( "{} in your Yaml is not recognized as" " a valid input for the model".format(comp)) mod_comp = getattr(plugin.model, cmp) tracer.read = getattr(tracer, "read", mod_comp.read) tracer.fetch = getattr( tracer, "fetch", getattr(mod_comp, "fetch", default_fetch)) for attr in ["file", "dir"]: if getattr(tracer, attr, "") == "": setattr(tracer, attr, getattr(mod_comp, attr, "")) # Fetch files and dates list_files, list_dates = tracer.fetch( trac_dir, trac_file, plugin.model.input_dates, target_dir, component=component, tracer=tracer, ) tracer.input_dates = list_dates tracer.input_files = list_files # Saving tracer_dir into component dir if not already available if not hasattr(component, "input_dates"): component.input_dates = list_dates if not hasattr(component, "input_files"): component.input_files = list_files # Get the domain and # change it to the domain side if lateral conditions if hasattr(tracer, "domain"): continue if hasattr(tracer, "get_domain"): tracer.domain = tracer.get_domain( trac_dir, trac_file, plugin.model.input_dates, target_dir, tracer=tracer, ) else: tracer.domain = plugin.domain
def ini_data(plugin, **kwargs): """Initializes LMDZ Args: plugin (Plugin): the model plugin to initialize **kwargs (dictionary): possible extra parameters Returns: loaded plugin and directory with executable """ info("Initializing the model") workdir = getattr(plugin, "workdir", "./") # Cleaning the model working directory shutil.rmtree("{}/model/".format(workdir), ignore_errors=True) # Initializes the directory path.init_dir("{}/model".format(workdir)) # copying the executable target = "{}/model/".format(workdir) + os.path.basename(plugin.fileexec) source = plugin.fileexec shutil.copy(source, target) # copying the definition file target = "{}/model/run.def".format(workdir) source = plugin.filedef shutil.copy(source, target) # LMDZ has a fixed integration time step plugin.tstep = 0 # Initializes default values # Period of sub-simulations: default = 1 month if not hasattr(plugin, "periods"): plugin.periods = "1MS" # Convection scheme: default = TK = Tiedke if not hasattr(plugin, "conv_scheme"): plugin.conv_scheme = "TK" # Loading input fluxes if specified, otherwise, loads default inputs for spec in plugin.chemistry.emis_species.attributes: tracer = getattr(plugin.chemistry.emis_species, spec) if hasattr(tracer, "provider") and hasattr(tracer, "format"): name = tracer.provider version = tracer.format else: name = "LMDZ" version = "sflx" tracer = Setup.load_registered(name, version, "fluxes", plg_orig=tracer) Setup.load_setup(Setup.from_dict({"fluxes": tracer}), "fluxes", level=1, **kwargs) setattr(plugin.chemistry.emis_species, spec, tracer) return plugin