def evaluate_quantity(v, quantities, stop_at=(numpy.ndarray, sympy.Number, numbers.Number)): """Evaluate numerical value of ``v`` using ``quantities`` Get the numerical value of variable ``v``, using a dictionary of quantities ``quantities`` containing the values of other variables. Parameters ---------- v : sympy.Symbol Symbol of quantity to evaluate. quantities : Mapping[Symbol, Expr] Dictionary of previously estimated quantities. stop_at : Tuple[type], optional Types at which to stop considering arguments. Defaults at ``(numpy.ndarray, sympy.Number, numbers.Number)``. That means that those are retained in the value. Returns ------- ndarray Numeric value of ``v``. """ e = expressions.get(v, v) values = {} for arg in typhon.physics.metrology.recursive_args( e, stop_at=(sympy.Symbol, sympy.Indexed)): # try: values[arg] = quantities[arg] except KeyError: values[arg] = evaluate_quantity( arg, quantities ) # if this fails `arg` should be added to quantities # substitute numerical values into expression if isinstance(e, sympy.Number): return UADA(float(e), name=names.get(v, str(v)), dims=(), attrs={"units": str(units[v])}) elif isinstance(e, stop_at): return e elif not e.args: raise ValueError("I don't know the value for: {!s}".format(e)) else: smb = tuple(e.free_symbols) return sympy.lambdify( smb, e, dummify=False, modules=numpy)(*[values[x].to_root_units() for x in smb])
def _add_harm_for_iasi(self, harm, channel, ok): # fill X1 # self.ds["ref_radiance"] contains IASI radiances; need to use # this to simulate HIRS radiance for MetOp-A freq = ureg.Quantity(numpy.loadtxt(self.hiasi.freqfile), ureg.Hz) specrad_wn = UADA(self.ds.isel(line=ok)["ref_radiance"]) specrad_f = specrad_wn.to(rad_u["si"], "radiance") srf = typhon.physics.units.em.SRF.fromRTTOV("metop_2", "hirs", channel) L = srf.integrate_radiances( freq, ureg.Quantity(specrad_f.values, specrad_f.attrs["units"])) harm["X1"] = (("M", "m1"), L[:, numpy.newaxis].astype("f4")) # fill Ur1. Uncertainties in refeence not considered. # Arbitrarily put at 1‰. harm["Ur1"] = (("M", "m1"), (harm["X1"] * 0.001).astype("f4")) # fill Us1. harm["Us1"] = (("M", "m1"), numpy.zeros((harm.dims["M"], 1), dtype="f4")) # fill uncertainty_type1 harm["uncertainty_type1"] = (("m1", ), numpy.array([1], dtype="i4")) # fill time1 harm["time1"] = (("M", ), self.ds.isel(line=ok)["mon_time"]) # and w-matrix stuff harm["w_matrix_use1"] = (("m1", ), numpy.array([0], dtype="i4")) harm["u_matrix_use1"] = (("m1", ), numpy.array([0], dtype="i4")) # add diagnostics # harm[f"nominal_measurand2"] = (("M",), # self.ds["mon_radiance"].sel(ch_mon=channel)) harm[f"lon1"] = (("M", ), self.ds[f"ref_longitude"][ok]) harm[f"lat1"] = (("M", ), self.ds[f"ref_latitude"][ok]) # harm[f"nominal_measurand_original1"] = harm[f"nominal_measurand1"] harm[f"column1"] = (("M", ), self.ds[f"ref_column"][ok]) harm[f"row1"] = (("M", ), self.ds[f"ref_row"][ok]) harm[f"matchup_distance"] = ((), 0)
def estimate_band_coefficients(self, sat=None, instr=None, ch=None): """Estimate band coefficients for fast/explicit BT calculations In some circumstances, a fully integrated SRF may be more expensive than needed. We can then choose an effective wavelength lambda_c along with coefficients alpha, beta such that instead of integrating, we estimate R = B(lambda*, T*), with T* = alpha + beta · T_B and lambda* a wavelength which may be close to the centroid lambda_c (but there is no guarantee). Such an approximation eliminates the explicit use of an integral which can make analysis easier. Returns: alpha (float): Offset in approximation for T* beta (float): Slope in approximation for T* lambda_eff (float): Effective wavelength delta_alpha (float): Uncertainty in alpha delta_beta (float): Uncertainty in beta delta_lambda_eff (float): Uncertainty in lambda_eff """ warnings.warn("Obtaining band coefficients from file", UserWarning) srcfile = config.conf[instr]["band_file"].format(sat=sat) rxp = r"(.{5,6})_ch(\d\d?)_shift([+-]\d+)pm\.nc\s+([\d.]+)\s+(-?[\de\-.]+)\s+([\d.]+)" dtp = [("satname", "S6"), ("channel", "u1"), ("shift", "i2"), ("centre", "f4"), ("alpha", "f4"), ("beta", "f4")] M = numpy.fromregex(srcfile, rxp, dtp).reshape(19, 7) dims = ("channel", "shiftno") ds = xarray.Dataset( { "centre": (dims, M["centre"]), "alpha": (dims, M["alpha"]), "beta": (dims, M["beta"]), "shift": (dims, M["shift"]) }, coords={"channel": M["channel"][:, 0]}) ds = ds.sel(channel=ch) ds0 = ds.sel(shiftno=0) # varies 1.1 – 15.2 nm depending on channel lambda_c = UADA(ds0["centre"], attrs={"units": "1/cm"}) alpha = UADA(ds0["alpha"], attrs={"units": "K"}) beta = UADA(ds0["beta"], attrs={"units": "1"}) delta_ds = ds.sel(shiftno=1) - ds0 delta_lambda_c = abs(UADA(delta_ds["centre"], attrs={"units": "1/cm"})) delta_alpha = abs(UADA(delta_ds["alpha"], attrs={"units": "K"})) delta_beta = abs(UADA(delta_ds["beta"], attrs={"units": "1"})) return (alpha, beta, lambda_c, delta_alpha, delta_beta, delta_lambda_c)
def evaluate(self, ds, ch): """Apply self-emission model to data When fitting (and preferably testing) has been completed, apply self-emission model to real data. This method: - extracts the predictor from the source scanlines dataset - converts this to the right format, including masking bad lines - estimate the predictand (space counts) for all lines Parameters ---------- ds : xarray.Dataset Dataset containing the L1B scanlines for which the self-emission is to be estimated ch : int Channel for which to estimate self-emission. Returns ------- X : xarray.Dataset Predictor that was used to evaluate Y : `typhon.physics.units.tools.UnitsAwareDataArray` Estimates of self-emission for all scanlines in ds """ X = self.get_predictor(ds, ch, recalculate_norm=False) # Y_ref = self.get_predictand(M, ch) # (Xx, Yy) = self._ds2ndarray(X, Y_ref) Xx = self._ds2ndarray(X, dropna=False) Yy_pred = numpy.zeros(shape=(X.coords["time"].size,), dtype="f4") Yy_pred.fill(numpy.nan) OK = numpy.isfinite(Xx).all(1) Yy_pred[OK] = self.models[ch].predict(Xx[OK, :]).squeeze() Y_pred = UADA(Yy_pred, coords=X["time"].coords, attrs=self.Y_ref.attrs) return (X, Y_pred)
def set_covariance(self, other, channel, da_ch, _set_other=True): """Set covariance between this and other effect. Parameters ---------- other : Effect The other `Effect` that we have a coviarance with. da_ch : :class:`~typhon.physics.units.tools.UnitsAwareDataArray` The magnitude of the covariance. """ da_ch = UADA(da_ch) da_ch = da_ch.assign_coords(calibrated_channel=channel) da_ch.attrs["units"] = str( (_fcdr_defs.FCDR_data_vars_props[self.name][2]["units"] * _fcdr_defs.FCDR_data_vars_props[other.name][2]["units"])) da_ch.name = f"u_{self.name:s}_{other.name:s}" # da.attrs["long_name"] = f"error covariance {self.magnitude.attrs['long_name']:s} with {other.magnitude.attrs['long_name']:s}" da_ch.attrs["short_name"] = da_ch.name da_ch.attrs["parameters"] = (str(self.parameter), str(other.parameter)) da_old = self._covariances.get(other.parameter) if da_old is None: da = da_ch elif channel in da_old.coords["calibrated_channel"].values: da = da_old else: da = xarray.concat([da_old, da_ch], dim="calibrated_channel", compat="identical") self._covariances[other.parameter] = da if _set_other: other.set_covariance(self, channel, da_ch, _set_other=False)
def plot_ds_summary_stats(ds, lab="", Ldb=None, write=False, filtered=False): """Plot statistics for enhanced matchup harmonisation file This function plots statistics for enhanced matchup harmonisation files, such as these: .. image:: /images/harmonisation-stats.png In addition, if ``write`` is ``True``, it also writes filter parameters --- see :func:`plot_hist_with_medmad_and_fitted_normal` for details. The resulting plot is written to a file. Parameters ---------- ds : xarray.Dataset Dataset from which to plot summaries. This dataset must correspond to the format as defined by Sam Hunt (W-matrix file) and as written out by `FCDR_HIRS.processing.analysis.merge_all`. lab : str, optional Additional ``debug`` label to describe the matchup file. This can be empty for a standard plot, or have a string such as ``neighbours_delta_cm·mW m^-2 sr^-1``, which corresponds to the ``--debug`` option of the :ref:`combine-hirs-hirs-matchups` script. Ldb : xarray.Dataset Dataset describing the HIRS-IASI dataset model, such as initialised by :meth:`matchups.init_Ldb`. write : bool, optional Will be passed on to :func:`plot_hist_with_medmad_and_fitted_normal`; if True, write out filter parameters. Defaults to False. filtered : bool, optional filtered or not, used for labelling """ if lab: # extra cruft added to string by combine_hirs_hirs_matchups lab = f"other_{lab:s}_" filterlab = "filtered" if filtered else "unfiltered" (f, ax_all) = matplotlib.pyplot.subplots(3, 5, figsize=(30, 15)) g = ax_all.flat cbs = [] chan = ds["channel"].item() # for unit conversions srf1 = typhon.physics.units.em.SRF.fromRTTOV( typhon.datasets.tovs.norm_tovs_name(ds.sensor_1_name, mode="RTTOV"), "hirs", ds["channel"].item()) srf2 = typhon.physics.units.em.SRF.fromRTTOV( typhon.datasets.tovs.norm_tovs_name(ds.sensor_2_name, mode="RTTOV"), "hirs", ds["channel"].item()) y1 = UADA(ds["nominal_measurand1"]).to(ds[f"K_{lab:s}forward"].units, "radiance", srf=srf1) y2 = UADA(ds["nominal_measurand2"]).to(ds[f"K_{lab:s}forward"].units, "radiance", srf=srf2) yb = [y1, y2] plo, phi = 1, 99 while True: kxrange = scipy.stats.scoreatpercentile(ds[f"K_{lab:s}forward"], [plo, phi]) kyrange = scipy.stats.scoreatpercentile(ds[f"K_{lab:s}backward"], [plo, phi]) kΔrange = scipy.stats.scoreatpercentile( ds[f"K_{lab:s}forward"] + ds[f"K_{lab:s}backward"], [plo, phi]) Lxrange = scipy.stats.scoreatpercentile(y1, [plo, phi]) Lyrange = scipy.stats.scoreatpercentile(y2, [plo, phi]) Lmax = max(Lxrange[1], Lyrange[1]) Lmin = min(Lxrange[0], Lyrange[0]) LΔrange = scipy.stats.scoreatpercentile(y2 - y1, [plo, phi]) if all( max(abs(rng)) / min(abs(rng)) < 100 for rng in (kxrange, kyrange, kΔrange, Lxrange, Lyrange, LΔrange)): break else: plo += 4 phi -= 4 if not plo < phi: raise ValueError( "Can't retrieve a reasonable range, all outliers?!") # radiance comparison a = next(g) pc = a.hexbin(y1, y2, extent=(Lmin, Lmax, Lmin, Lmax), mincnt=1) a.plot([Lmin, Lmax], [Lmin, Lmax], 'k--') a.set_xlabel("Radiance {sensor_1_name:s}".format(**ds.attrs) + f"[{y1.units:s}]") a.set_ylabel("Radiance {sensor_2_name:s}".format(**ds.attrs) + f"[{y2.units:s}]") a.set_title("Radiance comparison") a.set_xlim(Lmin, Lmax) a.set_ylim(Lmin, Lmax) cbs.append(f.colorbar(pc, ax=a)) # histograms for real and simulated measurements a = next(g) sensor_names = [ds.sensor_1_name, ds.sensor_2_name] for i in range(2): (cnts, bins, patches) = a.hist(yb[i], label=f"{sensor_names[i]:s} (measured)", histtype="step", range=(Lmin, Lmax), density=True, stacked=False, bins=100) for nm in Ldb.data_vars.keys(): (cnts, bins, patches) = a.hist(Ldb[nm].sel(chan=chan), label=f"{nm:s} (IASI-simulated)", histtype="step", range=(Lmin, Lmax), density=True, stacked=False, bins=100) a.legend() a.set_xlabel("Radiance " + f"[{y1.units:s}]") a.set_ylabel("Density per bin") a.set_title("Histograms of radiances") # K forward vs. K backward a = next(g) pc = a.hexbin(ds[f"K_{lab:s}forward"], ds[f"K_{lab:s}backward"], extent=numpy.concatenate([kxrange, kyrange]), mincnt=1) a.plot(kxrange, -kxrange, 'k--') a.set_xlabel( "K forward\n[{units:s}]".format(**ds[f"K_{lab:s}forward"].attrs)) a.set_ylabel( "K backward\n[{units:s}]".format(**ds[f"K_{lab:s}backward"].attrs)) a.set_title("Estimating K forward or backward, comparison") a.set_xlim(kxrange) a.set_ylim(kyrange) cbs.append(f.colorbar(pc, ax=a)) # histogram of K forward / backward differences a = next(g) (cnts, bins, patches) = a.hist(ds[f"K_{lab:s}forward"] + ds[f"K_{lab:s}backward"], histtype="step", bins=100, range=kΔrange) a.plot([0, 0], [0, cnts.max()], 'k--') a.set_xlabel("Sum of K estimates [{units:s}]".format( **ds[f"K_{lab:s}forward"].attrs)) a.set_ylabel("No. matchups in bin") a.set_title("Distribution of sum of K estimates") a.set_xlim(kΔrange) # Ks vs. Kforward a = next(g) pc = a.hexbin(ds[f"K_{lab:s}forward"], ds[f"K_{lab:s}forward"] + ds[f"K_{lab:s}backward"], extent=numpy.concatenate([kxrange, kΔrange]), mincnt=1) a.plot(kxrange, [0, 0], 'k--') a.set_xlabel( "K forward\n[{units:s}]".format(**ds[f"K_{lab:s}forward"].attrs)) a.set_ylabel("Sum of K estimates [{units:s}]".format( **ds[f"K_{lab:s}forward"].attrs)) a.set_title("K difference vs. K forward") a.set_xlim(kxrange) a.set_ylim(kΔrange) cbs.append(f.colorbar(pc, ax=a)) # K vs. radiance a = next(g) pc = a.hexbin(y1, ds[f"K_{lab:s}forward"], extent=numpy.concatenate([Lxrange, kxrange]), mincnt=1) a.set_xlabel("Radiance {sensor_1_name:s}".format(**ds.attrs) + f"[{y1.units:s}]") a.set_ylabel( "K forward\n[{units:s}]".format(**ds[f"K_{lab:s}forward"].attrs)) a.set_title("K vs. measurement") a.set_xlim(Lxrange) a.set_ylim(kxrange) cbs.append(f.colorbar(pc, ax=a)) # K vs. ΔL a = next(g) extremes = [min([LΔrange[0], kxrange[0]]), max([LΔrange[1], kxrange[1]])] ΔL = y2 - y1 pc = a.hexbin(ΔL, ds[f"K_{lab:s}forward"], extent=numpy.concatenate([LΔrange, kxrange]), mincnt=1) a.plot(extremes, extremes, 'k--') a.set_xlabel("Radiance {sensor_2_name:s} - {sensor_1_name:s}".format( **ds.attrs) + f"[{y1.units:s}]") a.set_ylabel( "K forward\n[{units:s}]".format(**ds[f"K_{lab:s}forward"].attrs)) a.set_title("K vs. measurement difference") a.set_xlim(LΔrange) a.set_ylim(kxrange) cbs.append(f.colorbar(pc, ax=a)) # K - ΔL vs. radiance a = next(g) K_min_ΔL = ds[f"K_{lab:s}forward"] - ΔL pc = a.hexbin(y1, K_min_ΔL, extent=numpy.concatenate([[Lmin, Lmax], kxrange - LΔrange]), mincnt=1) a.plot([0, Lmax], [0, 0], 'k--') a.set_xlabel("Radiance {sensor_1_name:s}".format(**ds.attrs) + f"[{y1.units:s}]") a.set_ylabel(f"K - ΔL [{y1.units:s}]".format(**ds.attrs)) a.set_xlim(Lmin, Lmax) a.set_ylim(sorted(kxrange - LΔrange)) a.set_title('K "wrongness" per radiance') cbs.append(f.colorbar(pc, ax=a)) # Kr / u_independent for both a = next(g) # awaiting having u_independent in files Kr_K = (((UADA(ds["nominal_measurand1"]) + UADA(ds["Kr"])).to( ds[f"K_{lab:s}forward"].units, "radiance", srf=srf1) - UADA(ds["nominal_measurand1"]).to( ds[f"K_{lab:s}forward"].units, "radiance", srf=srf1))) Kr_K[Kr_K == 0] = 1e-4 # prevent zeroes Kr_K99 = min(scipy.stats.scoreatpercentile(Kr_K, 99), 10 * Kr_K.median().item()) (cnts, bins, p1) = a.hist( Kr_K, histtype="step", bins=100, #density=True, range=[0, Kr_K99]) a.set_xlabel(f"Kr [{y1.units:s}]") a.set_ylabel("Count") a.set_xlim([0, Kr_K99]) # now with u u1 = ds["nominal_measurand_uncertainty_independent1"] u2 = ds["nominal_measurand_uncertainty_independent2"] # workaround, I forgot to add units u1.attrs["units"] = ds["nominal_measurand1"].attrs["units"] u2.attrs["units"] = ds["nominal_measurand2"].attrs["units"] u1_K = ((UADA(ds["nominal_measurand1"]) + UADA(u1)).to( ds[f"K_{lab:s}forward"].units, "radiance", srf=srf1) - UADA(ds["nominal_measurand1"]).to( ds[f"K_{lab:s}forward"].units, "radiance", srf=srf1)) u2_K = ((UADA(ds["nominal_measurand2"]) + UADA(u2)).to( ds[f"K_{lab:s}forward"].units, "radiance", srf=srf2) - UADA(ds["nominal_measurand2"]).to( ds[f"K_{lab:s}forward"].units, "radiance", srf=srf2)) uj = numpy.sqrt(u1_K**2 + u2_K**2) uj99 = min(scipy.stats.scoreatpercentile(uj, 99), uj.median().item() * 10) Kr_K_uj = Kr_K / uj KrKuj99 = min(scipy.stats.scoreatpercentile(Kr_K / uj, 99), Kr_K_uj.median().item() * 10) a2 = a.twiny() (cnts, bins, p2) = a2.hist( Kr_K_uj, histtype="step", bins=100, color="orange", #density=True, range=[0, KrKuj99]) a2.set_xlabel("Kr / u [1]") a2.set_xlim([0, KrKuj99]) a.set_title("Histogram of Kr (normalised by joint noise level)", y=1.11) a.xaxis.label.set_color(p1[0].get_edgecolor()) a2.xaxis.label.set_color(p2[0].get_edgecolor()) a.tick_params(axis='x', colors=p1[0].get_edgecolor()) a2.tick_params(axis='x', colors=p2[0].get_edgecolor()) # K-ΔL simply histogram a = next(g) plot_hist_with_medmad_and_fitted_normal( a, K_min_ΔL, sorted(kxrange - LΔrange), f"K - ΔL [{y1.units:s}]", "Density", "K-ΔL", write= "{sensor_1_name:s}_{sensor_2_name:s}/ch{channel:d}/{lab:s}/K_min_dL". format(channel=ds["channel"].item(), lab=lab, **ds.attrs) if write else False) # Kr vs. K-ΔL hexbin a = next(g) pc = a.hexbin(Kr_K, K_min_ΔL, extent=numpy.concatenate([[0, Kr_K99], sorted(kxrange - LΔrange)]), mincnt=1) a.set_xlabel(f"Kr [{y1.units:s}]") a.set_ylabel(f"K - ΔL [{y1.units:s}]") a.set_title("Joint distribution Kr and K - ΔL") cbs.append(f.colorbar(pc, ax=a)) # Kr vs. uncertainty a = next(g) pc = a.hexbin(Kr_K, uj, extent=numpy.concatenate([[0, Kr_K99], [0, uj99]]), mincnt=1) a.set_xlabel(f"Kr [{y1.units:s}]") a.set_ylabel(f"joint noise level [{y1.units:s}]") a.set_title("Joint distribution Kr and noise") # with some potential filters as lines x = numpy.array([0, Kr_K99]) for (ft, c, s) in ((5, "red", ":"), (25, "red", "--"), (150, "cyan", ":"), (750, "cyan", "--")): a.plot(x, x / ft, color=c, linewidth=2, linestyle=s, label="x/{:d} (removes {:.1%})".format( ft, ((Kr_K_uj > ft).sum() / Kr_K.size).item())) a.legend() a.set_xlim([0, Kr_K99]) a.set_ylim([0, uj99]) cbs.append(f.colorbar(pc, ax=a)) # ΔL/Kr, as suggested by Viju, see e-mail 2018-09-27 a = next(g) plot_hist_with_medmad_and_fitted_normal( a, ΔL / Kr_K, scipy.stats.scoreatpercentile(ΔL / Kr_K, [plo, phi]), f"ΔL/Kr [1]", "Density", "ΔL/Kr", write= "{sensor_1_name:s}_{sensor_2_name:s}/ch{channel:d}/{lab:s}/dL_over_Kr". format(channel=ds["channel"].item(), lab=lab, **ds.attrs) if write else False) # histogram of actually chosen K uncorrected a = next(g) (cnts, bins, p) = a.hist(ds["K"], histtype="step", bins=100, density=True, range=scipy.stats.scoreatpercentile(ds["K"], [plo, phi])) a.set_xlabel("K [native units]") a.set_ylabel("density") a.set_title("Histogram of chosen K") for cb in cbs: cb.set_label("No. matchups in bin") for a in ax_all.flat: a.grid(axis="both") try: chanstr = ", ".join( str(c) for c in numpy.atleast_1d( ds[f"K_{lab:s}forward"].attrs["channels_prediction"])) except KeyError: # until commit 828bd13, I inconsistently mixed "channels_prediction" # and "channels_used" chanstr = ", ".join( str(c) for c in numpy.atleast_1d( ds[f"K_{lab:s}forward"].attrs["channels_used"])) f.suptitle( "K stats for pair {sensor_1_name:s}, {sensor_2_name:s}, {time_coverage:s}" .format(**ds.attrs) + ", channel " + str(ds["channel"].item()) + ", " + lab + "\nchannels used to predict: " + chanstr) f.subplots_adjust(hspace=0.35, wspace=0.3) lab = lab.replace("·", "") # in LSF some nodes have ascii filesystem encoding?! graphics.print_or_show( f, False, "harmstats/{sensor_1_name:s}_{sensor_2_name:s}/ch{channel:d}/harmonisation_K_stats_{sensor_1_name:s}-{sensor_2_name:s}_ch{channel:d}_{time_coverage:s}_{lab:s}_{filterlab:s}." .format(channel=ds["channel"].item(), lab=lab, filterlab=filterlab, **ds.attrs))
def plot_timeseries(self, ch, sp=28): counter = itertools.count() ds = self.ds.sel(calibrated_channel=ch, scanpos=sp, minor_frame=sp) nrow = 8 gs = matplotlib.gridspec.GridSpec(nrow, 4) fig = matplotlib.pyplot.figure(figsize=(18, 3 * nrow)) # (fig, axes) = matplotlib.pyplot.subplots(nrow, 2, # gridspec_kw={"width_ratios": [3, 1], "hspace": 1}, # figsize=(18, 3*nrow)) # bad = (2*ds["u_R_Earth_nonrandom"] > ds["R_e"]) # for v in self.fields: # ds[v][bad] = numpy.nan if not numpy.isfinite(ds["T_b"]).any(): logger.warning("Found no valid BTs for " f"channel {ch:d}, skipping") return c = next(counter) a_tb = fig.add_subplot(gs[c, :3]) a_tb_h = fig.add_subplot(gs[c, 3]) c = next(counter) a_tb_u = fig.add_subplot(gs[c, :3]) a_tb_u_h = fig.add_subplot(gs[c, 3]) # components c = next(counter) a_tb_ucmp = fig.add_subplot(gs[c, :3]) a_tb_ucmp_h = fig.add_subplot(gs[c, 3]) bad = ((ds["quality_scanline_bitmask"] & _fcdr_defs.FlagsScanline.DO_NOT_USE) | (ds["quality_channel_bitmask"] & _fcdr_defs.FlagsChannel.DO_NOT_USE) | (ds["quality_pixel_bitmask"] & _fcdr_defs.FlagsPixel.DO_NOT_USE)) != 0 # This doesn't work # ds[["T_b","u_T_b_random","u_T_b_nonrandom"]][{"scanline_earth": bad}] = numpy.nan for fld in { f for f in self.fields if f.startswith("u_") or f in {"T_b", "R_e"} }: ds[fld].loc[{"scanline_earth": bad}] = numpy.nan self._plot_var_with_unc(ds["T_b"], ds["u_T_b_random"], ds["u_T_b_nonrandom"], a_tb, a_tb_h, a_tb_u, a_tb_u_h) dsu = ds[[x for x in ds.data_vars.keys() if x.startswith("u_from_")]] self._plot_unc_comps(dsu, a_tb_ucmp, a_tb_ucmp_h) # flags c = next(counter) # although exact same width as other time series would be # desirable, the colourbar currently messes this up /anyway/, so # we might as well take the full width. Use double height, # because there are many flags and each should be readable # individually. a_flags = fig.add_subplot(gs[c:c + 2, :]) perc_all = [] labels = [] period = ("5min" if (ds["time"][-1] - ds["time"][0]).values.astype("m8[s]") < numpy.timedelta64(2, 'h') else "1H") for f in ("scanline", "channel", "minorframe", "pixel"): da = ds[f"quality_{f:s}_bitmask"] (perc, meanings) = common.sample_flags(da, period, "scanline_earth") perc_all.append(perc) labels.extend(f"{f:s}_{mean:s}" for mean in meanings) perc = xarray.concat(perc_all, dim="flag") # this causes trouble when all values become nan (no flags set # during plotting period); and there are real nans (no data in # period) #perc.values[perc.values==0] = numpy.nan my_cmap = matplotlib.cm.get_cmap('cool') my_cmap.set_under("white") im = perc.T.plot.pcolormesh(ax=a_flags, cmap=my_cmap, vmin=0.0001) im.set_clim([0.0001, 1]) a_flags.set_yticks(numpy.arange(len(labels))) a_flags.set_yticklabels(labels) a_flags.set_title("Percentage of flag set per hour") # "{:s} {:%Y%m%d}-{:%Y%m%d}".format(self.satname, start, end)) a_flags.grid(axis="x") # a_tb_u_h = fig.add_subplot(gs[c, 3]) next(counter) c = next(counter) a_L = fig.add_subplot(gs[c, :3]) a_L_h = fig.add_subplot(gs[c, 3]) c = next(counter) a_L_u = fig.add_subplot(gs[c, :3]) a_L_u_h = fig.add_subplot(gs[c, 3]) self._plot_var_with_unc( UADA(ds["R_e"]).to(rad_u["ir"], "radiance"), UADA(ds["u_R_Earth_random"]).to(rad_u["ir"], "radiance"), UADA(ds["u_R_Earth_nonrandom"]).to(rad_u["ir"], "radiance"), a_L, a_L_h, a_L_u, a_L_u_h) c = next(counter) gridsize = 50 cmap = "viridis" self._plot_hexbin(ds["T_b"], ds["u_T_b_random"], fig.add_subplot(gs[c, 0])) self._plot_hexbin(ds["T_b"], ds["u_T_b_nonrandom"], fig.add_subplot(gs[c, 1])) self._plot_hexbin( UADA(ds["R_e"]).to(rad_u["ir"], "radiance"), UADA(ds["u_R_Earth_random"]).to(rad_u["ir"], "radiance"), fig.add_subplot(gs[c, 2])) hb = self._plot_hexbin( UADA(ds["R_e"]).to(rad_u["ir"], "radiance"), UADA(ds["u_R_Earth_nonrandom"]).to(rad_u["ir"], "radiance"), fig.add_subplot(gs[c, 3])) # todo: colorbar fig.subplots_adjust(right=0.8, bottom=0.2, top=0.9, hspace=1.0, wspace=0.4) for ax in fig.get_axes(): for lab in ax.get_xticklabels(): lab.set_visible(True) if ax.is_last_col() or ax.is_last_row(): # workarounds for # https://github.com/matplotlib/matplotlib/issues/8509 # as I don't want any histogram to lose its x-axis or # have rotated ticks # if ax.is_last_row(): lab.set_ha("center") lab.set_rotation(0) # else: else: lab.set_rotation(30) if not ax.is_last_col() and not ax.is_last_row(): ax.set_xlabel("Time") ax.grid(axis="both") a_tb_h.set_xlabel(a_tb.get_ylabel()) a_tb_u_h.set_xlabel(a_tb.get_ylabel()) a_L_h.set_xlabel(a_L.get_ylabel()) a_L_u_h.set_xlabel(a_L_u.get_ylabel()) tb = ds["time"].values[0].astype("M8[s]").astype(datetime.datetime) te = ds["time"].values[-1].astype("M8[s]").astype(datetime.datetime) fig.suptitle(self.figtit.format(tb=tb, te=te, self=self, ch=ch, sp=sp)) graphics.print_or_show( fig, False, self.figname.format(tb=tb, te=te, self=self, ch=ch))
#: Effect describing uncertainty due to IWCT type b IWCT_type_b = Effect(name="O_TPRT", description="IWCT type B", parameter=sympy.IndexedBase( meq.symbols["O_TPRT"])[meq.symbols["n"], meq.symbols["m"]], correlation_type=_systematic, correlation_scale=_inf, unit=ureg.K, dimensions=(), channel_correlations=_ones, rmodel=rmodel_calib_prt) # set magnitude when I'm sure everything else has been set (order of # kwargs not preserved before Python 3.6) IWCT_type_b.magnitude = UADA(0.1, name="uncertainty", attrs={"units": "K"}) #: Helper for effects correlated within same detector _blockmat = numpy.vstack( (numpy.hstack((numpy.ones(shape=(12, 12)), numpy.zeros(shape=(12, 9)))), numpy.hstack((numpy.zeros(shape=(9, 12)), numpy.ones(shape=(9, 9)))))) #: Effect describing uncertainty due to non-linearity harmonsiation parameter nonlinearity = Effect(name="a_2", description="Nonlinearity", parameter=meq.symbols["a_2"], correlation_type=_systematic, correlation_scale=_inf, unit=radiance_units["si"] / ureg.count**2, dimensions=(), channel_correlations=_blockmat,
def create_summary(self, start_date, end_date, fields=None, fcdr_type="debug", field_ranges=None): dates = pandas.date_range(start_date, end_date + datetime.timedelta(days=1), freq="D") fields = fields if fields is not None else [] fields.extend([f for f in self.fields[fcdr_type] if f not in fields]) logging.debug("Summarising fields: " + " ".join(fields)) if field_ranges is None: field_ranges = {} chandim = "channel" if fcdr_type == "easy" else "calibrated_channel" channels = numpy.arange(1, 20) hist_range = self.hist_range.copy() for (field, (lo, hi)) in field_ranges.items(): hist_range[field] = ("edges", [lo, hi]) #bins = numpy.linspace(self.hist_range, self.nbins) summary = xarray.Dataset( { **{ field: (("date", "ptile", "channel"), numpy.zeros((dates.size - 1, self.ptiles.size, channels.size), dtype="f4") * numpy.nan) for field in fields }, **{ f"hist_{field:s}": (("date", "bin_index", "channel"), numpy.zeros((dates.size - 1, self.nbins + 1, channels.size), dtype="u4")) for field in fields }, **{ f"bins_{field:s}": ( ("channel", "bin_edges"), # numpy.concatenate([[numpy.concatenate([[0], # numpy.linspace(170, 320, 100), [1000, 10000]])] for i in # range(5)]) numpy.concatenate([[ numpy.concatenate([[ min( hist_range.sel(channel=ch, edges=0)[field] - 1, 0) ], numpy.linspace(hist_range.sel(channel=ch, edges=0)[field], hist_range.sel(channel=ch, edges=1)[field], self.nbins, dtype="f4"), [ max( hist_range.sel(channel=ch, edges=1)[field] + 1, 1000) ]]) ] for ch in channels])) for field in fields }, }, coords={ "date": dates[:-1], "ptile": self.ptiles, "channel": channels }) for (sd, ed) in zip(dates[:-1], dates[1:]): try: ds = self.hirs.read_period( sd, ed, onerror="skip", excs=inspect.signature( self.hirs.read_period).parameters["excs"].default + (KeyError, OSError), locator_args={ "data_version": self.data_version, "format_version": self.format_version, "fcdr_type": fcdr_type }, fields=fields + [ f for f in self.extra_fields[fcdr_type] if f not in fields ]) if fcdr_type == "easy" and ds["u_structured"].dims == (): raise DataFileError( "See https://github.com/FIDUCEO/FCDR_HIRS/issues/171") #except (DataFileError, KeyError) as e: except DataFileError as e: logger.warning( "Could not read " f"{sd:%Y-%m-%d}--{ed:%Y-%m-%d}: {e!r}: {e.args[0]:s}") continue if fcdr_type == "debug": bad = ((2 * ds["u_R_Earth_nonrandom"] > ds["R_e"]) | ((ds["quality_scanline_bitmask"] & 1) != 0) | ((ds["quality_channel_bitmask"] & 1) != 0)) else: # should be "easy" bad = ( (2 * ds["u_structured"] > ds["bt"]) | ((ds["quality_scanline_bitmask"].astype("uint8") & 1) != 0) | ((ds["quality_channel_bitmask"].astype("uint8") & 1) != 0)) for field in fields: if field != "u_C_Earth": # workaround for https://github.com/FIDUCEO/FCDR_HIRS/issues/152 try: ds[field].values[bad.transpose( *ds[field].dims).values] = numpy.nan except ValueError: # I seem to be unabel to mask this field pass for field in fields: if "hertz" in ds[field].units: da = UADA(ds[field]).to(rad_u["ir"], "radiance") else: da = ds[field] if not da.notnull().any(): # hopeless logger.warning( f"All bad data for {self.satname:s} " f"{sd.year:d}-{sd.month:d}-{sd.day:d}–{ed.year:d}-{ed.month:d}-{ed.day}, not " f"summarising {field:s}.") continue # cannot apply limits here https://github.com/scipy/scipy/issues/7342 # and need to mask nans, see # https://github.com/scipy/scipy/issues/2178 # pt = scipy.stats.scoreatpercentile( # da.values.reshape(channels.size, -1), # self.ptiles, axis=1) # take transpose as workaround for # https://github.com/FIDUCEO/FCDR_HIRS/issues/152 # make sure we always reshape the same way... this causes # both #172 and #173 pt = scipy.stats.mstats.mquantiles(numpy.ma.masked_invalid( (da.transpose("channel", "x", "y") if fcdr_type == "easy" else da).values.reshape(channels.size, -1)), prob=self.ptiles / 100, axis=1, alphap=0, betap=1).T summary[field].loc[{"date": sd}] = pt for ch in range(1, 20): summary[f"hist_{field:s}"].loc[{ "date": sd, "channel": ch }] = numpy.histogram( da.loc[{ chandim: ch }], bins=summary[f"bins_{field:s}"].sel(channel=ch), range=(da.min(), da.max()))[0] of = pathlib.Path(self.basedir) / self.subdir / self.stored_name of = pathlib.Path( str(of).format(satname=self.satname, year=dates[0].year, month=dates[0].month, day=dates[0].day, year_end=dates[-2].year, month_end=dates[-2].month, day_end=dates[-2].day, fcdr_version=self.data_version, format_version=self.format_version, fcdr_type=fcdr_type)) of.parent.mkdir(parents=True, exist_ok=True) for field in fields: summary[field].encoding.update({ # "scale_factor": 0.001, # "_FillValue": numpy.iinfo("int32").min, "zlib": True, # "dtype": "int32", "complevel": 4 }) summary["hist_" + field].encoding.update({ "zlib": True, "complevel": 4 }) summary["bins_" + field].encoding.update({ "zlib": True, "complevel": 4, # "dtype": "int32", # "_FillValue": numpy.iinfo("int32").min, # "scale_factor": 0.001}) }) logger.info(f"Writing {of!s}") summary.to_netcdf(str(of))
def evaluate(self, ds, ch): """Apply self-emission model to data Does a simple linear interpolation of the Rself value from surrounding calibartion lines. Rself will be zero at the first calibration time and will be estimate at the second calibrated time to match the observed space view signal given the gain etc. The values will then be interpolated in time to generate the new values of Rself. The final curves will give a sawtooth variation starting near zero to a maximum just before the next calibration observation. This method: - extracts the predictor from the source scanlines dataset (time) - converts this to the right format, including masking bad lines - estimate the predictand (space counts) for all lines assuming a linear model Parameters ---------- ds : xarray.Dataset Dataset containing the L1B scanlines for which the self-emission is to be estimated ch : int Channel for which to estimate self-emission. Returns ------- X : xarray.Dataset Predictor that was used to evaluate Y : `typhon.physics.units.tools.UnitsAwareDataArray` Estimates of self-emission for all scanlines in ds """ # # Get the temperatures to estimate uncertainty # # Make sure temperatures are in sync with space counts etc. # so ensures both space and iwct observations available # views_space = xarray.DataArray( ds["scantype"].values == self.hirs.typ_space, coords=ds["scantype"].coords) views_iwct = xarray.DataArray( ds["scantype"].values == self.hirs.typ_iwt, coords=ds["scantype"].coords) dsi = self.hirs.dist_space_iwct space_followed_by_iwct = (views_space[:-dsi].variable & views_iwct[dsi:].variable) ds_temp = ds.isel(time=slice(None, -dsi)).isel( time=space_followed_by_iwct) start = True L = [] for t_fld in self.temperatures: t_fld = _tovs_defs.temperature_names.get(t_fld, t_fld) x = ds["temperature_{:s}".format(t_fld)] for dim in set(x.dims) - {"time"}: x = x.mean(dim=dim, keep_attrs=True) newT = x.astype("f8") L.append(x.astype("f8")) # prevent X⁴ precision loss # # Convert Temperatures to radiance # Rad = self.hirs.srfs[ch - 1].blackbody_radiance( ureg.Quantity(newT.values, ureg.K)) gd = (newT > 340.) if numpy.sum(gd) > 0: Rad[gd] = numpy.nan if start: npR = numpy.zeros((1, len(Rad)), dtype=numpy.float32) npRout = numpy.zeros((1, len(Rad)), dtype=numpy.float32) npR[0, :] = Rad[:] start = False else: npRout[0, :] = Rad npR = numpy.append(npR, npRout, axis=0) # Needed as predictors for T_outlier routines X = xarray.merge(L) Tname = "temperature_{:s}".format(self.temperatures[0]) R = UADA(npR,\ dims=['ntemp','time'],\ coords={'time':ds[Tname].coords['time'].values,\ 'ntemp':numpy.arange(len(self.temperatures))},\ attrs={"units": str(rad_u["si"])} ) start = True L = [] for t_fld in self.temperatures: t_fld = _tovs_defs.temperature_names.get(t_fld, t_fld) x = ds_temp["temperature_{:s}".format(t_fld)] for dim in set(x.dims) - {"time"}: x = x.mean(dim=dim, keep_attrs=True) newT = x.astype("f8") L.append(x.astype("f8")) # prevent X⁴ precision loss # # Convert Temperatures to radiance # Radspace = self.hirs.srfs[ch - 1].blackbody_radiance( ureg.Quantity(newT.values, ureg.K)) gd = (newT > 340.) if numpy.sum(gd) > 0: Radspace[gd] = numpy.nan if start: npRspace = numpy.zeros((1, len(Radspace)), dtype=numpy.float32) npRspaceout = numpy.zeros((1, len(Radspace)), dtype=numpy.float32) npRspace[0, :] = Radspace[:] start = False else: npRspaceout[0, :] = Radspace npRspace = numpy.append(npRspace, npRspaceout, axis=0) # Needed as predictors for T_outlier routines X = xarray.merge(L) Tname = "temperature_{:s}".format(self.temperatures[0]) R_space = UADA(npRspace,\ dims=['ntemp','time'],\ coords={'time':ds_temp[Tname].coords['time'].values,\ 'ntemp':numpy.arange(len(self.temperatures))},\ attrs={"units": str(rad_u["si"])} ) # # Get where different views are # views_space = xarray.DataArray(ds["scantype"].values == \ tovs.HIRS.typ_space, \ coords=ds["scantype"].coords) # views_iwct = xarray.DataArray(ds["scantype"].values == \ # tovs.HIRS.typ_iwt, \ # coords=ds["scantype"].coords) views_Earth = xarray.DataArray(ds["scantype"].values == \ tovs.HIRS.typ_Earth, \ coords=ds["scantype"].coords) # # Get counts # (counts_space, counts_iwct) = self.hirs.extract_calibcounts(ds, ch) # # Get gain/offset/non-linear etc. # (time, offset, slope, a2) = self.hirs.calculate_offset_and_slope(ds, ch, self.hirs.srfs[ch - 1]) if not numpy.array_equal(numpy.isfinite(offset), numpy.isfinite(slope)): raise ValueError("Expecting offset and slope to have same " "finite values, but I got disappointed.") # Check to see if we have a non-linear term try: a_2 = a2.values[0] except: a_2 = 0. # # Note use median here as Gerrit does - presumably for 'outlier' cases # sometimes seen in calibration views # slope_val = slope.median('scanpos').values # # Get Rself at calibration points based on previous gain # Rself_cal = numpy.zeros((counts_space.values.shape[0]-1,\ counts_space.values.shape[1])) for pos in range(counts_space.values.shape[1]): Rself_cal[:,pos]=slope_val[pos]*\ (counts_space.values[1:,pos]-counts_space.values[0:-1,pos])+\ a_2*(counts_space.values[1:,pos]**2-\ counts_space.values[0:-1,pos]**2) # # Gerrits code seems to use the median values so we will as well # Rself_med = numpy.zeros(Rself_cal.shape[0]) for pos in range(Rself_cal.shape[0]): Rself_med[pos] = numpy.median(Rself_cal[pos, :]) scanpos_time = ds["time"][views_Earth] Rearth = R.values[:, views_Earth] Rspace = R_space.values[:, :] # # Get combination of input Ts that mimic observed self # emission - used to estimate uncertainty, not Rself # itself # Note fit to delta radiances to radiances themselves # p_estRself, u_est, ok = self.estimate_Rself(Rspace, Rself_med) # # Loop round calibration cycle data to get Rself at earth view # locations # # Also # Estimate uncertainty from varition of other temperatures as # percentage from linear model and apply mean percentage to # Rself to get uncertainty # Rself = numpy.zeros(shape=(scanpos_time.coords["time"].size,), \ dtype="f4") uRself = numpy.zeros(shape=(scanpos_time.coords["time"].size,), \ dtype="f4") estR = numpy.zeros(shape=(scanpos_time.coords["time"].size,), \ dtype="f4") Rself.fill(numpy.nan) uRself.fill(numpy.nan) estR.fill(numpy.nan) for i in range(len(time.values) - 1): gd = (scanpos_time.values >= time.values[i]) & \ (scanpos_time.values <= time.values[i+1]) if numpy.sum(gd) > 0: dtime = self.get_np_seconds(time.values[i + 1] - time.values[i]) Rself_slope = Rself_med[i] / dtime dtime = self.get_np_seconds(scanpos_time.values[gd] - time.values[i]) Rself[gd] = Rself_slope * dtime if ok: T = Rearth[:, gd] Tdata = numpy.zeros(T.shape[1]) Tdata[:] = p_estRself[0] for j in range(len(p_estRself) - 1): Tdata = Tdata + p_estRself[j + 1] * (T[j, :] - Rspace[j, i]) estR[gd] = Tdata zero_point = Tdata[0] Tdata = Tdata - zero_point U_slope = (Tdata[Tdata.shape[0] - 1] - Tdata[0]) / Tdata.shape[0] Uval = numpy.zeros(Tdata.shape[0]) Uval[:] = 0. for j in range(Tdata.shape[0]): if numpy.all(numpy.isfinite(T[:, j])): # # Uncertainty is deviation from a straight line # Uval[j] = numpy.abs((Tdata[j] - U_slope * j)) else: Uval[j] == 0. uRself[gd] = Uval # # Set minimum uncertainty at that from fit of data # gd = (numpy.isfinite(uRself) & (uRself < u_est)) if numpy.sum(gd) > 0: uRself[gd] = u_est # Fill all lines with u_est if no calculation done gd = (~numpy.isfinite(Rself) | ~numpy.isfinite(uRself)) if numpy.sum(gd) > 0: uRself[gd] = u_est # a = numpy.zeros((len(Rself),8)) # a[:,0] = Rself # a[:,1] = uRself # a[:,2] = Rearth[0,:] # a[:,3] = Rearth[1,:] # a[:,4] = Rearth[2,:] # a[:,5] = Rearth[3,:] # a[:,6] = Rearth[4,:] # a[:,7] = estR[:] # numpy.savetxt('test.dat',a) Y_pred = UADA(Rself,\ coords=scanpos_time.coords["time"].coords, \ attrs={"units": str(rad_u["si"])}) # # Force u_Rself to be a single value as this is what # Gerrits code expects unfortunately - subsequent code # seems hardwired to this assumption... # uRself = numpy.mean(uRself) rself_time = [scanpos_time.values[0],\ scanpos_time.values[len(scanpos_time.values)-1]] return rself_time, X, Y_pred, uRself