Exemple #1
0
def outputs2native(self,
                   data2dump,
                   input_type,
                   di,
                   df,
                   runsubdir,
                   mode="fwd",
                   dump=True):
    """Reads outputs to pycif objects.

    If the mode is 'fwd' or 'tl', only onservation-like outputs are extracted.
    For the 'adj' mode, all outputs relative to model sensitivity are extracted.

    Dumps to a NetCDF file with output concentrations if needed"""

    if mode in ["tl", "fwd"]:
        if not hasattr(self, "dataobs"):
            self.dataobs = init_empty()

        # Read simulated concentrations
        # In classical model, this should correspond to reading output files
        # Here the observations are already stored in the model object
        datastore = self.dataobs

        # Re-aggregate observations spanning several time steps
        # Obsvect divides by number of tstep at higher level
        # (in case the observation spans several periods)
        ds = datastore.groupby([datastore.index, "station", "i", "j"]).sum()
        ds = ds[["sim", "sim_tl"]]
        ds.index = ds.index.get_level_values(0)

        self.dataobs = ds

        return self.dataobs

    elif mode == "adj":
        if input_type != "fluxes":
            return data2dump

        # Reads sensitivities
        # In the toy model's case, just take the data from the object itself
        datasensit = self.dflx

        # TODO: generalize with several species
        spec = self.chemistry.acspecies.attributes[0]
        data2dump[("fluxes", spec)]["adj_out"] = datasensit

        return data2dump
Exemple #2
0
    def ini_data(self, **kwargs):
        """Initializes the measurement plugin. This is pased on iterating over
        multiple species and providers

        Args:
            plugin (MeasurementPlugin): Measurement definition

        Returns:
            Updates on the fly the measurements
        """

        datei = self.datei
        datef = self.datef

        # If the measurement definition is empty in the Yaml,
        # return an empty datastore
        if not hasattr(self, "species"):
            self.datastore = init_empty()

        else:
            self.datastore = self.parse_tracers(datei, datef, **kwargs)
Exemple #3
0
def parse_tracers(self, datei, datef, file_monitor="", workdir="", **kwargs):
    """Parses all observation files related to the tracers specified as
    inputs

    Args:
        self (Measurement) : dictionary of tracers as defined in the Yaml
        file
        datei (datetime.datetime): initial date for the inversion window
        datef (datetime.datetime): end date for the inversion window
        file_monitor (str): file with pre-compile observations if exists
        workdir (str): working directory
        logfile (str): path to the log file for verbose instances
        **kwargs (dictionary) : any additional argument that might be useful
                                for extracting observations.
                                Default contains config_dict

    Returns:
        dictionary : dictionary with all observations

    Notes:
        The data that are kept in the datastore are also saved in a
        monit_standard.txt file for debugging
    """

    # Dump type: default is nc
    self.dump_type = getattr(self, "dump_type", "nc")

    # If file_monitor is defined, tries reading it
    if hasattr(self, "file_monitor"):
        file_monitor = self.file_monitor

        try:
            info("Extracting measurements from {}".format(file_monitor))
            return dump.read_datastore(file_monitor,
                                       dump_type=self.dump_type,
                                       **kwargs)

        except IOError as e:
            info(e.message)
            info("Could not find a monitor file, reading observations")

        except Exception as e:
            info(e)
            info("Could not read the specified monitor file: {}", file_monitor)
            raise e

    # Otherwise, create the monitor from observations
    if hasattr(self, "workdir"):
        workdir = self.workdir

    file_monitor = workdir + "/obs/monit_standard.nc"

    # If the measurement definition is empty in the Yaml,
    # return an empty datastore
    if not hasattr(self, "species"):
        return init_empty()

    # Loops through tracers if monitor not found
    path.init_dir(workdir + "/obs/")
    shutil.rmtree(file_monitor, ignore_errors=True)

    datastore = {}

    # Merging species datastores into one data
    for spec in self.species.attributes:
        specattr = getattr(self.species, spec)

        if hasattr(specattr, "format") and hasattr(specattr, "provider"):
            info("Extracting measurements for {}"
                 " with a single provider {}".format(spec, specattr.provider))

            parser = ObsParser.get_parser(specattr)
            datastore[spec] = parser.parse_multiple_files(spec=spec)
            datastore[spec] = datastore[spec].loc[str(datei):str(datef)]
            continue

        else:
            info("Extracting measurements for {} from multiple providers".
                 format(spec))

        # Looping over providers
        dataspec = {}
        for provider in specattr.attributes:
            # Get the observation parser
            providattr = getattr(specattr, provider)
            parser = ObsParser.get_parser(providattr)

            # Reading all files from provider
            dataspec[provider] = parser.parse_multiple_files(spec=spec)

            # Cropping to the inversion window
            dataspec[provider] = dataspec[provider].loc[str(datei):str(datef)]

        datastore[spec] = pd.concat(list(dataspec.values()))

    # Grouping species into a single datastore
    datastore = pd.concat(list(datastore.values()))

    # Dumping
    dump.dump_datastore(datastore,
                        file_monitor,
                        workdir,
                        dump_type=self.dump_type,
                        **kwargs)

    return datastore
Exemple #4
0
def init_y0(obsvect, measurements, **kwargs):
    """Initializes the observation vector. In most cases the observation
    vector is similar to the measurement vector but there is no reason
    for it to be the same, especially when super-observations are used
    (e.g. daily or afternoon averages, gradients, etc.)

    Args:
        obsvect (Plugin): the observation vector with all its attributes
        measurements (Plugin): the pre-loaded measurements

    Returns:
        obsvect, updated with horizontal and vertical coordinates,
        as well as model time steps

    """

    # Saves initialized observation vector to workdir/obsvect/monitor
    # Try linking file_obsvect to workdir/obsvect if existing
    # Otherwise, define it and generate it later on
    path.init_dir("{}/obsvect".format(obsvect.workdir))
    file_monitor = "{}/obsvect/monitor.{}".format(obsvect.workdir,
                                                  obsvect.dump_type)
    shutil.rmtree(file_monitor, ignore_errors=True)

    if os.path.isfile(obsvect.file_obsvect):
        path.link(obsvect.file_obsvect, file_monitor)

    # From here, no interaction with files outside the working directory
    obsvect.file_obsvect = file_monitor

    # Try reading file
    allcorrec, ok_hcoord, ok_vcoord, do_tstep = False, False, False, True
    exclude_stations = getattr(obsvect, "exclude_stat", [])
    try:
        # Reading a monitor file if available
        info("Try opening {}".format(file_monitor))
        obsvect.datastore = read_datastore(file_monitor,
                                           dump_type=obsvect.dump_type)

        # Check that the monitor is consistent with the simulation to be run
        # If True, returns directly the observation as is
        allcorrect, ok_hcoord, ok_vcoord, do_tstep = obsvect.check_monitor()
        if allcorrect and len(exclude_stations) == 0:
            return obsvect

        # Otherwise, recompute necessary items (i, j, lev, tstep)
        # of the observation error
        raise PluginError

    # If the monitor file could not be opened, start back from measurements
    except IOError:

        info("Couldn't open the observation file")
        info("Generating observation vector from measurements")

        # Copying attributes from measurements
        for key in measurements.attributes:
            setattr(obsvect, key, getattr(measurements, key))

        # Copying datastore from measurements if existing and not empty
        obsvect.datastore = getattr(measurements, "datastore", init_empty())

    except PluginError:
        info("Warning! The prescribed monitor file is not consistent with "
             "the current simulation. Re-analysing it")

    # Remove unwanted stations
    if len(exclude_stations) > 0:
        mask = ~(obsvect.datastore["station"].isin(exclude_stations))
        obsvect.datastore = obsvect.datastore.loc[mask]

    # Crops y0 to the simulation period
    obsvect.datastore = crop_monitor(obsvect.datastore, obsvect.datei,
                                     obsvect.datef, **kwargs)

    # Computes grid cells where observations fall
    if not ok_hcoord:
        obsvect = hcoord(obsvect, **kwargs)

    # Computes model layer where observations fall
    if not ok_vcoord:
        obsvect = vcoord(obsvect, **kwargs)

    # Compute time steps corresponding to the observations
    if do_tstep:
        obsvect = tstep(obsvect, **kwargs)

    # Dumps the datastore
    # Attributes to keep in the monitor if NetCDF format (recommanded!)
    nc_attributes = {
        "datei": obsvect.datei.strftime("%d-%m-%Y %H:%M:%S"),
        "datef": obsvect.datef.strftime("%d-%m-%Y %H:%M:%S"),
        "model name": obsvect.model.plugin.name,
        "model version": obsvect.model.plugin.version,
        "domain nlon": str(obsvect.model.domain.nlon),
        "domain nlat": str(obsvect.model.domain.nlat),
    }

    if getattr(obsvect, "dump", True):
        dump_datastore(
            obsvect.datastore,
            file_monit=obsvect.file_obsvect,
            dump_type=obsvect.dump_type,
            nc_attributes=nc_attributes,
            mode="w",
        )

    return obsvect
Exemple #5
0
def outputs2native(self,
                   data2dump,
                   input_type,
                   di,
                   df,
                   runsubdir,
                   mode="fwd",
                   dump=True):
    """Reads outputs to pyCIF objects.

    If the mode is 'fwd' or 'tl', only observation-like outputs are extracted.
    For the 'adj' mode, all outputs relative to model sensitivity are extracted.

    Dumps to a NetCDF file with output concentrations if needed"""

    ddi = min(di, df)

    if mode in ["fwd", "tl"]:
        if not hasattr(self, "dataobs"):
            self.dataobs = init_empty()

        # If no simulated concentration is available just pass
        sim_file = "{}/mod.txt".format(runsubdir)
        print("runsubdir", runsubdir)
        if os.stat(sim_file).st_size == 0:
            info("CHIMERE ran without any observation "
                 "to be compared with for sub-simu "
                 "only CHIMERE's outputs are available")
            self.dataobs.loc[:, "sim"] = np.nan
            return

        # Read simulated concentrations
        data = pd.read_csv(
            sim_file,
            delim_whitespace=True,
            header=None,
            usecols=range(6, 12),
            names=["sim", "pmid", "dp", "airm", "hlay", "simfwd"],
        )

        # Loop over observations in active species
        mask = (self.dataobs["parameter"].str.upper().isin(
            self.chemistry.acspecies.attributes))

        # Putting values to the local data store
        # Assumes arithmetic averages upto now
        inds = [0] + list(np.cumsum(self.dataobs.loc[mask, "dtstep"][:-1]))

        column = "sim" if mode == "fwd" else "sim_tl"

        dataavg = pd.DataFrame([
            data.iloc[k:k + dt].sum()
            for k, dt in zip(inds, self.dataobs.loc[mask, "dtstep"])
        ])

        self.dataobs.loc[mask, column] = dataavg.loc[:, "sim"].values
        self.dataobs.loc[mask, "pressure"] = dataavg.loc[:, "pmid"].values
        self.dataobs.loc[mask, "dp"] = dataavg.loc[:, "dp"].values
        self.dataobs.loc[mask, "airm"] = dataavg.loc[:, "airm"].values
        self.dataobs.loc[mask, "hlay"] = dataavg.loc[:, "hlay"].values

        if column == "sim_tl":
            self.dataobs.loc[mask, "sim"] = dataavg.loc[:, "simfwd"].values

        return self.dataobs

    elif mode == "adj":
        # List of CHIMERE dates
        dref = datetime.datetime.strptime(
            os.path.basename(os.path.normpath(runsubdir)), "%Y-%m-%d_%H-%M")
        list_dates = self.input_dates[ddi]

        # Reading only output files related to given input_type
        ref_names = {
            "inicond": "ini",
            "fluxes": "aemis",
            "biofluxes": "bemis",
            "latcond": "bc",
            "topcond": "bc",
        }

        if input_type not in ref_names:
            return data2dump

        list_file = glob.glob("{}/aout.*{}*.nc".format(runsubdir,
                                                       ref_names[input_type]))

        for out_file in list_file:
            with Dataset(out_file, "r") as f:
                # Load list of species and reformat it
                list_species = [
                    b"".join(s).strip().decode("ascii")
                    for s in f.variables["species"][:]
                ]

                # Restrict to species required in data2dump
                dump_species = [
                    t[1].lower() for t in data2dump if t[0] == input_type
                ]
                if "" not in dump_species:
                    list_species = [
                        s for s in list_species if s.lower() in dump_species
                    ]

                else:
                    for s in list_species:
                        if s not in data2dump:
                            data2dump[(input_type, s)] = {}

                # Different output structure between LBC and others
                if "ini" in out_file or "emis" in out_file:
                    data = {s: f.variables[s][:] for s in list_species}

                elif "bc" in out_file:
                    data_lat = {
                        s: f.variables["lat_conc"][..., k]
                        for k, s in enumerate(list_species)
                    }
                    data_top = {
                        s: f.variables["top_conc"][..., k]
                        for k, s in enumerate(list_species)
                    }

            if "ini" in out_file:
                for spec in data:
                    data2dump[("inicond", spec)]["adj_out"] = xr.DataArray(
                        data[spec][np.newaxis, ...],
                        coords={"time": np.array([dref])},
                        dims=("time", "lev", "lat", "lon"),
                    )

            elif "bc" in out_file:
                if input_type == "latcond":
                    for spec in data_lat:
                        data2dump[("latcond", spec)]["adj_out"] = xr.DataArray(
                            data_lat[spec][..., np.newaxis, :],
                            coords={"time": list_dates},
                            dims=("time", "lev", "lat", "lon"),
                        )
                if input_type == "topcond":
                    for spec in data_top:
                        data2dump[("topcond", spec)]["adj_out"] = xr.DataArray(
                            data_top[spec][..., np.newaxis, :],
                            coords={"time": list_dates},
                            dims=("time", "lev", "lat", "lon"),
                        )

            elif "aemis" in out_file or "bemis" in out_file:
                if "aemis" in out_file:
                    emis_type = "fluxes"
                else:
                    emis_type = "biofluxes"

                for spec in data:
                    data2dump[(emis_type, spec)]["adj_out"] = xr.DataArray(
                        data[spec],
                        coords={"time": list_dates},
                        dims=("time", "lev", "lat", "lon"),
                    )

        return data2dump
Exemple #6
0
def outputs2native(self,
                   data2dump,
                   input_type,
                   di,
                   df,
                   runsubdir,
                   mode="fwd",
                   dump=True):
    """Reads outputs to pycif objects.

    If the mode is 'fwd' or 'tl', only observation-like outputs are extracted.
    For the 'adj' mode, all outputs relative to model sensitivity are extracted.

    Dumps to a NetCDF file with output concentrations if needed

    Args:
        self (pycif.utils.classes.models.Model): Model object
        runsubdir (str): current sub-sumilation directory
        mode (str): running mode; one of: 'fwd', 'tl', 'adj'; default is 'fwd'

        dump (bool): dumping outputs or not; default is True
    Return:
        dict

    """

    ddi = min(di, df)
    ddf = max(di, df)

    if mode in ["tl", "fwd"]:
        if not hasattr(self, "dataobs"):
            self.dataobs = init_empty()

        # Read simulated concentrations
        sim_file = "{}/obs_out.bin".format(runsubdir)
        if not os.path.isfile(sim_file):
            self.dataobs.loc[:, "sim"] = np.nan
            return

        sim = np.fromfile(sim_file, dtype="float").reshape((-1, 4), order="F")

        # Observations that were not extracted by LMDZ are set to NaN
        sim[sim == 0] = np.nan

        # Putting values to the local data store
        self.dataobs.loc[:, "sim"] = sim[:, 0]
        self.dataobs["pressure"] = sim[:, 2]
        self.dataobs["dp"] = sim[:, 3]

        if mode == "tl":
            self.dataobs.loc[:, "sim_tl"] = sim[:, 1]

        return self.dataobs

    elif mode == "adj":
        nlon = self.domain.nlon
        nlat = self.domain.nlat

        # Stores daily dates of the period for later aggregation
        dref = datetime.datetime.strptime(
            os.path.basename(os.path.normpath(runsubdir)), "%Y-%m-%d_%H-%M")
        list_dates = self.input_dates[ddi]

        # Reading only output files related to given input_type
        ref_names = {
            "inicond": "init",
            "fluxes": "fluxes",
            "prescrconcs": "scale",
            "prodloss3d": "prodscale",
        }

        if input_type not in ref_names:
            return data2dump

        list_file = glob.glob("{}/mod_{}_*_out.bin".format(
            runsubdir, ref_names[input_type]))
        specs2dump = [s[1] for s in data2dump]
        for out_file in list_file:
            spec = os.path.basename(out_file).split("_")[2]

            if spec not in specs2dump:
                continue

            with open(out_file, "rb") as f:
                data = np.fromfile(f, dtype=np.float)

            data = data.reshape((nlon, nlat, -1), order="F").transpose(
                (2, 1, 0))

            if input_type == "inicond":
                data2dump[("inicond", spec)]["adj_out"] = xr.DataArray(
                    data[np.newaxis, ...],
                    coords={"time": np.array([dref])},
                    dims=("time", "lev", "lat", "lon"),
                )
                continue

            # Adding one time stamp to fit with input dates
            # including the first stamp of the next month
            data = np.concatenate((data, np.zeros((1, nlat, nlon))), axis=0)
            data = data[:, np.newaxis, ...]
            data2dump[(input_type, spec)]["adj_out"] = xr.DataArray(
                data,
                coords={"time": list_dates},
                dims=("time", "lev", "lat", "lon"),
            )

        return data2dump
Exemple #7
0
def parse_tracers(self, datei, datef, file_monitor="", workdir="", **kwargs):
    """Generate random observations at random locations in the domain"""

    # If file_monitor is defined, tries reading it
    if hasattr(self, "file_monitor"):
        file_monitor = self.file_monitor

        try:
            info("Extracting measurements from {}".format(file_monitor))
            return read_datastore(file_monitor,
                                  dump_type=getattr(self, "dump_type", "nc"),
                                  **kwargs)

        except IOError as e:
            info(str(e))
            info("Could not find a monitor file, reading observations")

        except Exception as e:
            info(e)
            info("Could not read the specified monitor file: {}", file_monitor)
            raise e

    # Otherwise, create the monitor from observations
    else:
        if hasattr(self, "workdir"):
            workdir = self.workdir

        if file_monitor == "" or file_monitor is None:
            file_monitor = workdir + "/obs/monit_standard.nc"

    # Initialize empty datastore
    ds = init_empty()

    # Loop over species
    for trcr in self.species.attributes:
        tracer = getattr(self.species, trcr)

        # Make date range
        drange = pd.date_range(datei,
                               datef,
                               freq=getattr(tracer, "frequency", "1H"))
        ndates = drange.size

        # Pick random locations for x and y within the reference domain
        xmin = self.domain.zlon.min()
        xmax = self.domain.zlon.max()
        ymin = self.domain.zlat.min()
        ymax = self.domain.zlat.max()
        zmax = getattr(tracer, "zmax", 100)

        nstat = tracer.nstations

        statx = np.random.uniform(low=xmin, high=xmax, size=nstat)
        staty = np.random.uniform(low=ymin, high=ymax, size=nstat)
        statz = np.random.uniform(low=1, high=zmax, size=nstat)

        # Put locations into a monitor
        duration = pd.to_timedelta(getattr(tracer, "duration", "1H"))
        seconds_duration = duration.total_seconds() / 3600.0

        df = pd.DataFrame({
            "alt": np.array(ndates * list(statz)),
            "lat": np.array(ndates * list(staty)),
            "lon": np.array(ndates * list(statx)),
            "station": ndates * list(range(nstat)),
            "parameter": trcr,
            "duration": seconds_duration,
        })

        df.index = (np.array(nstat * list(drange)).reshape(
            (ndates, nstat), order="F").flatten())
        if getattr(tracer, "random_subperiod_shift", False):
            df.index += np.random.uniform(0, 1, size=ndates * nstat) * duration

        # Appending datastore
        ds = pd.concat((ds, df), sort=False)

    # Dumping the datastore for later use by pycif
    dump_datastore(ds, file_monit=self.file_monitor, dump_type="nc")

    return ds