def interpolate_to_uniform_global_grid(data_in, lons_in, lats_in, out_dx=0.5): """ Interpolate data to a regular, global latlon grid :param data_in: :param lons_in: :param lats_in: :param out_dx: :return: """ x, y, z = lat_lon.lon_lat_to_cartesian(lons_in.flatten(), lats_in.flatten()) tree = cKDTree(list(zip(x, y, z))) lons_out = np.arange(-180, 180, 0.5) lats_out = np.arange(-90, 90, 0.5) lats_out, lons_out = np.meshgrid(lats_out, lons_out) x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons_out.flatten(), lats_out.flatten()) dists, inds = tree.query(list(zip(x_out, y_out, z_out))) data_out = data_in.flatten()[inds].reshape(lons_out.shape) return lons_out, lats_out, data_out
def __init__(self, lon1=180.0, lat1=0.0, lon2=180.0, lat2=0.0, **kwargs): """ Basis vectors of the rotated coordinate system in the original coord system e1 = -p1/|p1| => row0 e2 = -( p2 - (p1, p2) * p1) / |p2 - (p1, p2) * p1| #perpendicular to e1, and lies in the plane parallel to the plane (p1^p2) => row1 e3 = [p1,p2] / |[p1, p2]| , perpendicular to the plane (p1^p2) => row2 """ print(kwargs) self.lon1 = lon1 self.lon2 = lon2 self.lat1 = lat1 self.lat2 = lat2 print(lon1, lat1, lon2, lat2) self.mean_earth_radius_m_crcm5 = 0.637122e7 # mean earth radius used in the CRCM5 model for area calculation p1 = lat_lon.lon_lat_to_cartesian(lon1, lat1, r_earth=1.0) p2 = lat_lon.lon_lat_to_cartesian(lon2, lat2, r_earth=1.0) p1 = np.array(p1) p2 = np.array(p2) cross_prod = np.cross(p1, p2) dot_prod = np.dot(p1, p2) row0 = -np.array(p1) / np.sqrt(np.dot(p1, p1)) e2 = (dot_prod * p1 - p2) row1 = e2 / np.sqrt(np.dot(e2, e2)) row2 = cross_prod / np.sqrt(np.dot(cross_prod, cross_prod)) self.rot_matrix = np.matrix([row0, row1, row2]) assert isinstance(self.rot_matrix, np.matrix)
def to_mask(self, lons_2d_grid, lats_2d_grid): """ :param lons_2d_grid: :param lats_2d_grid: :return: the mask of the subregion corresponding to the grid with the upper right and lower left points from self """ x_g, y_g, z_g = lat_lon.lon_lat_to_cartesian(lons_2d_grid.flatten(), lats_2d_grid.flatten()) ktree = KDTree(list(zip(x_g, y_g, z_g))) ll_x, ll_y, ll_z = lat_lon.lon_lat_to_cartesian( self.lleft_lon, self.lleft_lat) ur_x, ur_y, ur_z = lat_lon.lon_lat_to_cartesian( self.uright_lon, self.uright_lat) i_g, j_g = np.indices(lons_2d_grid.shape) i_g_flat, j_g_flat = i_g.flatten(), j_g.flatten() _, ind_ll = ktree.query((ll_x, ll_y, ll_z), k=1) _, ind_ur = ktree.query((ur_x, ur_y, ur_z), k=1) i_ll, j_ll = i_g_flat[ind_ll], j_g_flat[ind_ll] i_ur, j_ur = i_g_flat[ind_ur], j_g_flat[ind_ur] res = np.zeros_like(lons_2d_grid, dtype=bool) res[i_ll:i_ur + 1, j_ll:j_ur + 1] = 1 return res, (i_ll, j_ll), (i_ur, j_ur)
def to_mask(self, lons_2d_grid, lats_2d_grid): """ :param lons_2d_grid: :param lats_2d_grid: :return: the mask of the subregion corresponding to the grid with the upper right and lower left points from self """ x_g, y_g, z_g = lat_lon.lon_lat_to_cartesian(lons_2d_grid.flatten(), lats_2d_grid.flatten()) ktree = KDTree(list(zip(x_g, y_g, z_g))) ll_x, ll_y, ll_z = lat_lon.lon_lat_to_cartesian(self.lleft_lon, self.lleft_lat) ur_x, ur_y, ur_z = lat_lon.lon_lat_to_cartesian(self.uright_lon, self.uright_lat) i_g, j_g = np.indices(lons_2d_grid.shape) i_g_flat, j_g_flat = i_g.flatten(), j_g.flatten() _, ind_ll = ktree.query((ll_x, ll_y, ll_z), k=1) _, ind_ur = ktree.query((ur_x, ur_y, ur_z), k=1) i_ll, j_ll = i_g_flat[ind_ll], j_g_flat[ind_ll] i_ur, j_ur = i_g_flat[ind_ur], j_g_flat[ind_ur] res = np.zeros_like(lons_2d_grid, dtype=bool) res[i_ll:i_ur + 1, j_ll: j_ur + 1] = 1 return res, (i_ll, j_ll), (i_ur, j_ur)
def get_ktree(ds: xarray.Dataset): lon, lat = ds["lon"].values, ds["lat"].values x, y, z = lat_lon.lon_lat_to_cartesian(lon.flatten(), lat.flatten()) return KDTree( list(zip(x, y, z)) )
def toGeographicLonLat(self, x, y): """ convert geographic lat / lon to rotated coordinates """ p = lat_lon.lon_lat_to_cartesian(x, y, r_earth=1) p = self.rot_matrix.T * np.mat(p).T return lat_lon.cartesian_to_lon_lat(p.A1)
def get_seasonal_clim_interpolate_to(self, lons=None, lats=None, start_year=2002, end_year=2010, season_to_months: dict = None, vname: str = "sst"): """ Calculate the climatology and then interpolate it to the given lon and lat fields :param lons: :param lats: :param start_year: :param end_year: :param season_to_months: :param vname: :return: """ seasclim = self.get_seasonal_clim(start_year=start_year, end_year=end_year, season_to_months=season_to_months, vname=vname) xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons.flatten(), lats.flatten()) inds = None seasclim_interpolated = OrderedDict() for sname, data in seasclim.items(): if inds is None: lons_s, lats_s = data.coords["lon"][:], data.coords["lat"][:] print(data) lats_s, lons_s = np.meshgrid(lats_s, lons_s) xs, ys, zs = lat_lon.lon_lat_to_cartesian( lons_s.flatten(), lats_s.flatten()) ktree = KDTree(list(zip(xs, ys, zs))) dists, inds = ktree.query(list(zip(xt, yt, zt))) # transpose because the input field's layout is (t,z,lat, lon) seasclim_interpolated[sname] = data.values.T.flatten( )[inds].reshape(lons.shape) return seasclim_interpolated
def toProjectionXY(self, lon, lat): """ Convert geographic lon/lat coordinates to the rotated lat lon coordinates """ p = lat_lon.lon_lat_to_cartesian(lon, lat, r_earth=1) p = self.rot_matrix * np.mat(p).T return lat_lon.cartesian_to_lon_lat(p.A1)
def get_ktree(ds: xarray.Dataset): lonv = ds["lon"] if lonv.ndim == 3: lon, lat = ds["lon"][0].values, ds["lat"][0].values else: lon, lat = ds["lon"].values, ds["lat"].values x, y, z = lat_lon.lon_lat_to_cartesian(lon.flatten(), lat.flatten()) return KDTree( list(zip(x, y, z)) )
def get_seasonal_clim_interpolate_to(self, lons=None, lats=None , start_year=2002, end_year=2010, season_to_months:dict=None, vname:str= "sst"): """ Calculate the climatology and then interpolate it to the given lon and lat fields :param lons: :param lats: :param start_year: :param end_year: :param season_to_months: :param vname: :return: """ seasclim = self.get_seasonal_clim(start_year=start_year, end_year=end_year, season_to_months=season_to_months, vname=vname) xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons.flatten(), lats.flatten()) inds = None seasclim_interpolated = OrderedDict() for sname, data in seasclim.items(): if inds is None: lons_s, lats_s = data.coords["lon"][:], data.coords["lat"][:] print(data) lats_s, lons_s = np.meshgrid(lats_s, lons_s) xs, ys, zs = lat_lon.lon_lat_to_cartesian(lons_s.flatten(), lats_s.flatten()) ktree = KDTree(list(zip(xs, ys, zs))) dists, inds = ktree.query(list(zip(xt, yt, zt))) # transpose because the input field's layout is (t,z,lat, lon) seasclim_interpolated[sname] = data.values.T.flatten()[inds].reshape(lons.shape) return seasclim_interpolated
def main(varname=""): plot_utils.apply_plot_params(width_cm=22, height_cm=5, font_size=8) # series = get_monthly_accumulations_area_avg(data_dir="/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009", # varname=varname) # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2009_monthly", # varname=varname) # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_HL_1980-2009_monthly", # varname=varname) selected_months = [10, 11, 12, 1, 2, 3, 4, 5] data_root = common_params.data_root label_to_datapath = OrderedDict([ # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009"), # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_1980-2009"), # (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_1989-2010_1989-2010" / "merged"), # (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_2079-2100_2079-2100" / "merged"), (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_1989-2010_monthly_1989-2010" / "merged"), (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_2079-2100_monthly_2079-2100" / "merged"), ]) # longutudes and latitudes of the focus region around the Great Lakes (we define it, mostly for performance # issues and to eliminate regions with 0 hles that still are in the 200 km HLES zone) focus_region_lonlat_nc_file = data_root / "lon_lat.nc" label_to_series = OrderedDict() label_to_color = { common_params.crcm_nemo_cur_label: "skyblue", common_params.crcm_nemo_fut_label: "salmon" } gl_mask = get_gl_mask(label_to_datapath[common_params.crcm_nemo_cur_label]) hles_region_mask = get_mask_of_points_near_lakes(gl_mask, npoints_radius=20) # select a file from the directory sel_file = None for f in label_to_datapath[common_params.crcm_nemo_cur_label].iterdir(): if f.is_file(): sel_file = f break assert sel_file is not None, f"Could not find any files in {label_to_datapath[common_params.crcm_nemo_cur_label]}" # Take into account the focus region with xarray.open_dataset(sel_file) as ds: hles_region_mask_lons, hles_region_mask_lats = [ ds[k].values for k in ["lon", "lat"] ] with xarray.open_dataset(focus_region_lonlat_nc_file) as ds_focus: focus_lons, focus_lats = [ ds_focus[k].values for k in ["lon", "lat"] ] coords_src = lat_lon.lon_lat_to_cartesian( hles_region_mask_lons.flatten(), hles_region_mask_lats.flatten()) coords_dst = lat_lon.lon_lat_to_cartesian(focus_lons.flatten(), focus_lats.flatten()) ktree = KDTree(list(zip(*coords_src))) dists, inds = ktree.query(list(zip(*coords_dst)), k=1) focus_mask = hles_region_mask.flatten() focus_mask[...] = False focus_mask[inds] = True focus_mask.shape = hles_region_mask.shape for label, datapath in label_to_datapath.items(): hles_file = None for f in datapath.iterdir(): if f.name.endswith("_daily.nc"): hles_file = f break assert hles_file is not None, f"Could not find any HLES files in {datapath}" series = get_monthly_accumulations_area_avg_from_merged( data_file=hles_file, varname=varname, region_of_interest_mask=hles_region_mask & focus_mask) label_to_series[label] = series # plotting gs = GridSpec(1, 2, wspace=0.05) fig = plt.figure() ax = fig.add_subplot(gs[0, 1]) start_date = datetime(2001, 10, 1) dates = [ start_date.replace(month=(start_date.month + i) % 13 + int((start_date.month + i) % 13 == 0), year=start_date.year + (start_date.month + i) // 13) for i in range(13) ] def format_month_label(x, pos): logging.debug(num2date(x)) return "{:%b}".format(num2date(x)) # calculate bar widths dates_num = date2num(dates) width = np.diff(dates_num) / (len(label_to_series) * 1.5) width = np.array([width[0] for _ in width]) # select the months width = np.array( [w for w, d in zip(width, dates) if d.month in selected_months]) dates = [d for d in dates[:-1] if d.month in selected_months] dates_num = date2num(dates) label_to_handle = OrderedDict() label_to_annual_hles = OrderedDict() for i, (label, series) in enumerate(label_to_series.items()): values = [series[d.month] * 100 for d in dates] # convert to percentages values_sum = sum(values) # save the total annual hles for later reuse label_to_annual_hles[label] = values_sum # values = [v / values_sum * 100 for v in values] logger.debug([label, values]) logger.debug(f"sum(values) = {sum(values)}") h = ax.bar(dates_num + i * width, values, width=width, align="edge", linewidth=0.5, edgecolor="k", facecolor=label_to_color[label], label=label, zorder=10) label_to_handle[label] = h ax.set_ylabel("HLES (cm/day)") ax.set_title("(b) Monthly HLES distribution") ax.xaxis.set_major_formatter(FuncFormatter(func=format_month_label)) ax.xaxis.set_major_locator( MonthLocator(bymonthday=int(sum(width[:len(label_to_series)]) / 2.) + 1)) ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) # ax.set_title(common_params.varname_to_display_name[varname]) ax.yaxis.grid(True, linestyle="--", linewidth=0.5) # ax.text(1, 1, "(a)", fontdict=dict(weight="bold"), transform=ax.transAxes, va="top", ha="right") ax_with_legend = ax # area average annual total HLES text_align_props = dict(transform=ax.transAxes, va="bottom", ha="right") cur_hles_annual = label_to_annual_hles[common_params.crcm_nemo_cur_label] fut_hles_annual = label_to_annual_hles[common_params.crcm_nemo_fut_label] ax.text( 1, 0.2, r"$\Delta_{\rm total}$" + f"({(fut_hles_annual - cur_hles_annual) / cur_hles_annual * 100:.1f}%)", **text_align_props, fontdict=dict(size=6)) # Plot the domain and the HLES region of interest ax = fig.add_subplot(gs[0, 0]) topo_nc_file = data_root / "geophys_452x260_me.nc" ax = plot_domain_and_interest_region( ax, topo_nc_file, focus_region_lonlat_nc_file=focus_region_lonlat_nc_file) ax.set_title("(a) Experimental domain") # Add a common legend labels = list(label_to_handle) handles = [label_to_handle[l] for l in labels] ax_with_legend.legend(handles, labels, bbox_to_anchor=(0, -0.18), loc="upper left", borderaxespad=0., ncol=2) # ax.grid() sel_months_str = "_".join([str(m) for m in selected_months]) common_params.img_folder.mkdir(exist_ok=True) img_file = common_params.img_folder / f"{varname}_histo_cc_m{sel_months_str}_domain.png" print(f"Saving plot to {img_file}") fig.savefig(img_file, **common_params.image_file_options)
def main(in_dir="/RESCUE/skynet3_rech1/huziy/anusplin_links", out_dir="/HOME/huziy/skynet3_rech1/hail/anusplin_ts"): out_dir_p = Path(out_dir) in_dir_p = Path(in_dir) lon0 = -114.0708 lat0 = 51.0486 vname = "daily_precipitation_accumulation" vname_alternatives = ["daily_accumulation_precipitation"] vname_alternatives.append(vname) var_list = [vname] fname_hint = "pcp" spatial_ind = None varname_to_list_of_frames = {vname: [] for vname in var_list} for fin in in_dir_p.iterdir(): if fin.name.lower().endswith("ret"): continue if fin.name.lower().endswith("verif"): continue if fname_hint not in fin.name.lower(): continue if not fin.name.endswith(".nc"): continue print(fin) year, month = get_ym_from_path(fin) with Dataset(str(fin)) as ds: if spatial_ind is None: lons, lats = ds.variables["lon"][:], ds.variables["lat"][:] x, y, z = lat_lon.lon_lat_to_cartesian(lons.flatten(), lats.flatten()) ktree = KDTree(list(zip(x, y, z))) x0, y0, z0 = lat_lon.lon_lat_to_cartesian(lon0, lat0) dist, spatial_ind = ktree.query((x0, y0, z0)) for vname_alt in vname_alternatives: try: values = ds[vname_alt][:] values = [field.flatten()[spatial_ind] for field in values] break except IndexError as ierr: pass dates = [datetime(year, month, int(d)) for d in ds["time"][:]] varname_to_list_of_frames[vname].append( pd.DataFrame(index=dates, data=values)) for vname in var_list: df = pd.concat(varname_to_list_of_frames[vname]) assert isinstance(df, pd.DataFrame) df.sort_index(inplace=True) df.to_csv(str(out_dir_p.joinpath("{}.csv".format(vname))), float_format="%.3f", index_label="Time")
def main(): # target grid for interpolation nml_path = "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix_daymet/gemclim_settings.nml" target_grid_config = grid_config.gridconfig_from_gemclim_settings_file(nml_path) print(target_grid_config) target_lons, target_lats = target_grid_config.get_lons_and_lats_of_gridpoint_centers() xt, yt, zt = lat_lon.lon_lat_to_cartesian(target_lons.flatten(), target_lats.flatten()) # the output folder out_folder = Path(nml_path).parent # Source data for precip and temperature: Daymet daily aggregated to 10km data_sources = { "PR": "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_prcp_10x10/*.nc*", "TT": "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_tavg_10x10/*.nc*" } vname_map = { "TT": "tavg", "PR": "prcp" } chunk_size = 1000 for vname, data_path in data_sources.items(): with xarray.open_mfdataset(data_path, data_vars="minimal") as ds: vname_daymet = vname_map[vname] arr = ds[vname_daymet] t = ds["time"] ktree = get_ktree(ds) d, sp_inds = ktree.query(list(zip(xt, yt, zt)), k=1) data_out = [] nt = len(t) for start_index in range(0, nt, chunk_size): end_index = min(start_index + chunk_size - 1, nt - 1) chunk = end_index - start_index + 1 arr_sel = arr[start_index:end_index + 1, :, :].to_masked_array() print(arr_sel.shape) data = arr_sel.reshape((chunk, -1))[:, sp_inds].reshape((chunk, ) + target_lons.shape) data_out.append(data) # --- data_out = np.concatenate(data_out, axis=0) ds_out = xarray.Dataset( data_vars={ vname: (["time", "x", "y"], data_out), "lon": (["x", "y"], target_lons), "lat": (["x", "y"], target_lats), }, coords={"time": ("time", t.values)}, ) ds_out.to_netcdf(str(out_folder / f"{vname}.nc"))
def get_seasonal_means_with_ttest_stats_interpolated_to( self, lons_target, lats_target, season_to_monthperiod=None, start_year=-np.Inf, end_year=np.Inf, convert_monthly_accumulators_to_daily=False): """ :param lons_target, lats_target: 2d arrays of target longitudes and latitudes :param season_to_monthperiod: :param start_year: :param end_year: :param convert_monthly_accumulators_to_daily: if true converts monthly accumulators to daily, :return dict(season: [mean, std, nobs]) # coarsen the data and coordinates to the target scale and interpolate using nearest neighbours """ target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] + lats_target[1, 1] - lats_target[0, 0]) / 2.0 coarsening = int(target_scale_deg / self.characteristic_scale_deg + 0.5) print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}". format(self.characteristic_scale_deg, target_scale_deg, coarsening)) def coarsening_func(x, axis=None): _mask = np.less(np.abs(x - self.missing_value), 1.0e-6) if np.all(_mask): return self.missing_value * np.ma.ones( _mask.shape).mean(axis=axis) y = np.ma.masked_where(_mask, x) return y.mean(axis=axis) # aggregate the data trim_excess = True data = da.coarsen(coarsening_func, self.data, axes={ 1: coarsening, 2: coarsening }, trim_excess=trim_excess) lons_s = da.coarsen(np.mean, da.from_array(self.lons, self.chunks[1:]), axes={ 0: coarsening, 1: coarsening }, trim_excess=trim_excess).compute() lats_s = da.coarsen(np.mean, da.from_array(self.lats, self.chunks[1:]), axes={ 0: coarsening, 1: coarsening }, trim_excess=trim_excess).compute() source_grid = list( zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(), lats_s.flatten()))) print(np.shape(source_grid)) ktree = KDTree(source_grid) dists, inds = ktree.query( list( zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(), lats_target.flatten())))) print("data.shape = ", data.shape) result, mask = self.__get_seasonal_means_with_ttest_stats_dask_lazy( data, season_to_monthperiod=season_to_monthperiod, start_year=start_year, end_year=end_year, convert_monthly_accumulators_to_daily= convert_monthly_accumulators_to_daily) # invoke the computations and interpolate the result for season in result: print("Computing for {}".format(season)) for i in range(len(result[season]) - 1): result[season][i] = np.ma.masked_where( mask, result[season][i].compute()).flatten()[inds].reshape( lons_target.shape) return result
def main(): start_year = 1980 end_year = 2009 HL_LABEL = "CRCM5_HL" NEMO_LABEL = "CRCM5_NEMO" # critical p-value for the ttest aka significance level p_crit = 1 vars_of_interest = [ # T_AIR_2M, # TOTAL_PREC, # SWE, default_varname_mappings.LATENT_HF, default_varname_mappings.SENSIBLE_HF, default_varname_mappings.LWRAD_DOWN, default_varname_mappings.SWRAD_DOWN # LAKE_ICE_FRACTION ] coastline_width = 0.3 vname_to_seasonmonths_map = { SWE: OrderedDict([("November", [11]), ("December", [12]), ("January", [1, ])]), LAKE_ICE_FRACTION: OrderedDict([ ("December", [12]), ("January", [1, ]), ("February", [2, ]), ("March", [3, ]), ("April", [4, ])]), T_AIR_2M: season_to_months, TOTAL_PREC: season_to_months, } # set season to months mappings for vname in vars_of_interest: if vname not in vname_to_seasonmonths_map: vname_to_seasonmonths_map[vname] = season_to_months sim_configs = { HL_LABEL: RunConfig(data_path="/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected", start_year=start_year, end_year=end_year, label=HL_LABEL), NEMO_LABEL: RunConfig(data_path="/RECH2/huziy/coupling/coupled-GL-NEMO1h_30min/selected_fields", start_year=start_year, end_year=end_year, label=NEMO_LABEL), } sim_labels = [HL_LABEL, NEMO_LABEL] vname_to_level = { T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID), U_WE: VerticalLevel(1, level_kinds.HYBRID), V_SN: VerticalLevel(1, level_kinds.HYBRID), default_varname_mappings.LATENT_HF: VerticalLevel(5, level_kinds.ARBITRARY), default_varname_mappings.SENSIBLE_HF: VerticalLevel(5, level_kinds.ARBITRARY), } # Try to get the land_fraction for masking if necessary land_fraction = None try: first_ts_file = Path(sim_configs[HL_LABEL].data_path).parent / "pm1979010100_00000000p" land_fraction = get_land_fraction(first_timestep_file=first_ts_file) except Exception as err: raise err pass # Calculations # prepare params for interpolation lons_t, lats_t, bsmap = get_target_lons_lats_basemap(sim_configs[HL_LABEL]) # get a subdomain of the simulation domain nx, ny = lons_t.shape iss = IndexSubspace(i_start=20, j_start=10, i_end=nx // 1.5, j_end=ny / 1.8) # just to change basemap limits lons_t, lats_t, bsmap = get_target_lons_lats_basemap(sim_configs[HL_LABEL], sub_space=iss) xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons_t.flatten(), lats_t.flatten()) vname_map = {} vname_map.update(default_varname_mappings.vname_map_CRCM5) # Read and calculate simulated seasonal means mod_label_to_vname_to_season_to_std = {} mod_label_to_vname_to_season_to_nobs = {} sim_data = defaultdict(dict) for label, r_config in sim_configs.items(): store_config = { "base_folder": r_config.data_path, "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME, "varname_mapping": vname_map, "level_mapping": vname_to_level, "offset_mapping": default_varname_mappings.vname_to_offset_CRCM5, "multiplier_mapping": default_varname_mappings.vname_to_multiplier_CRCM5, } dm = DataManager(store_config=store_config) mod_label_to_vname_to_season_to_std[label] = {} mod_label_to_vname_to_season_to_nobs[label] = {} interp_indices = None for vname in vars_of_interest: # -- end_year_for_current_var = end_year if vname == SWE: end_year_for_current_var = min(1996, end_year) # -- seas_to_year_to_mean = dm.get_seasonal_means(varname_internal=vname, start_year=start_year, end_year=end_year_for_current_var, season_to_months=vname_to_seasonmonths_map[vname]) # get the climatology seas_to_clim = {seas: np.array(list(y_to_means.values())).mean(axis=0) for seas, y_to_means in seas_to_year_to_mean.items()} sim_data[label][vname] = seas_to_clim if interp_indices is None: _, interp_indices = dm.get_kdtree().query(list(zip(xt, yt, zt))) season_to_std = {} mod_label_to_vname_to_season_to_std[label][vname] = season_to_std season_to_nobs = {} mod_label_to_vname_to_season_to_nobs[label][vname] = season_to_nobs for season in seas_to_clim: interpolated_field = seas_to_clim[season].flatten()[interp_indices].reshape(lons_t.shape) seas_to_clim[season] = interpolated_field # calculate standard deviations of the interpolated fields season_to_std[season] = np.asarray([field.flatten()[interp_indices].reshape(lons_t.shape) for field in seas_to_year_to_mean[season].values()]).std(axis=0) # calculate numobs for the ttest season_to_nobs[season] = np.ones_like(lons_t) * len(seas_to_year_to_mean[season]) # Plotting: interpolate to the same grid and plot obs and biases xx, yy = bsmap(lons_t, lats_t) lons_t[lons_t > 180] -= 360 for vname in vars_of_interest: field_mask = maskoceans(lons_t, lats_t, np.zeros_like(lons_t), inlands=vname in [SWE]).mask field_mask_lakes = maskoceans(lons_t, lats_t, np.zeros_like(lons_t), inlands=True).mask plot_utils.apply_plot_params(width_cm=11 * len(vname_to_seasonmonths_map[vname]), height_cm=20, font_size=8) fig = plt.figure() nrows = len(sim_configs) + 1 ncols = len(vname_to_seasonmonths_map[vname]) gs = GridSpec(nrows=nrows, ncols=ncols) # plot the fields for current_row, sim_label in enumerate(sim_labels): for col, season in enumerate(vname_to_seasonmonths_map[vname]): field = sim_data[sim_label][vname][season] ax = fig.add_subplot(gs[current_row, col]) if current_row == 0: ax.set_title(season) clevs = get_clevs(vname) if clevs is not None: bnorm = BoundaryNorm(clevs, len(clevs) - 1) cmap = cm.get_cmap("viridis", len(clevs) - 1) else: cmap = "viridis" bnorm = None the_mask = field_mask_lakes if vname in [T_AIR_2M, TOTAL_PREC, SWE] else field_mask to_plot = np.ma.masked_where(the_mask, field) * internal_name_to_multiplier[vname] # temporary plot the actual values cs = bsmap.contourf(xx, yy, to_plot, ax=ax, levels=get_clevs(vname), cmap=cmap, norm=bnorm, extend="both") bsmap.drawcoastlines(linewidth=coastline_width) bsmap.colorbar(cs, ax=ax) if col == 0: ax.set_ylabel("{}".format(sim_label)) # plot differences between the fields for col, season in enumerate(vname_to_seasonmonths_map[vname]): field = sim_data[NEMO_LABEL][vname][season] - sim_data[HL_LABEL][vname][season] ax = fig.add_subplot(gs[-1, col]) clevs = get_clevs(vname + "biasdiff") if clevs is not None: bnorm = BoundaryNorm(clevs, len(clevs) - 1) cmap = cm.get_cmap("bwr", len(clevs) - 1) else: cmap = "bwr" bnorm = None to_plot = field * internal_name_to_multiplier[vname] # to_plot = np.ma.masked_where(field_mask, field) * internal_name_to_multiplier[vname] # ttest a = sim_data[NEMO_LABEL][vname][season] # Calculate the simulation data back from biases std_a = mod_label_to_vname_to_season_to_std[NEMO_LABEL][vname][season] nobs_a = mod_label_to_vname_to_season_to_nobs[NEMO_LABEL][vname][season] b = sim_data[HL_LABEL][vname][season] # Calculate the simulation data back from biases std_b = mod_label_to_vname_to_season_to_std[HL_LABEL][vname][season] nobs_b = mod_label_to_vname_to_season_to_nobs[HL_LABEL][vname][season] t, p = ttest_ind_from_stats(mean1=a, std1=std_a, nobs1=nobs_a, mean2=b, std2=std_b, nobs2=nobs_b, equal_var=False) # Mask non-significant differences as given by the ttest to_plot = np.ma.masked_where(p > p_crit, to_plot) # mask the points with not sufficient land fraction if land_fraction is not None and vname in [SWE, ]: to_plot = np.ma.masked_where(land_fraction < 0.05, to_plot) # print("land fractions for large differences ", land_fraction[to_plot > 30]) cs = bsmap.contourf(xx, yy, to_plot, ax=ax, extend="both", levels=get_clevs(vname + "biasdiff"), cmap=cmap, norm=bnorm) bsmap.drawcoastlines(linewidth=coastline_width) bsmap.colorbar(cs, ax=ax) if col == 0: ax.set_ylabel("{}\n-\n{}".format(NEMO_LABEL, HL_LABEL)) fig.tight_layout() # save a figure per variable img_file = "seasonal_differences_noobs_{}_{}_{}-{}.png".format(vname, "-".join([s for s in vname_to_seasonmonths_map[vname]]), start_year, end_year) img_file = img_folder.joinpath(img_file) fig.savefig(str(img_file), dpi=300) plt.close(fig)
def main(in_dir="/RESCUE/skynet3_rech1/huziy/anusplin_links", out_dir="/HOME/huziy/skynet3_rech1/hail/anusplin_ts"): out_dir_p = Path(out_dir) in_dir_p = Path(in_dir) lon0 = -114.0708 lat0 = 51.0486 vname = "daily_precipitation_accumulation" vname_alternatives = ["daily_accumulation_precipitation"] vname_alternatives.append(vname) var_list = [vname] fname_hint = "pcp" spatial_ind = None varname_to_list_of_frames = {vname: [] for vname in var_list} for fin in in_dir_p.iterdir(): if fin.name.lower().endswith("ret"): continue if fin.name.lower().endswith("verif"): continue if fname_hint not in fin.name.lower(): continue if not fin.name.endswith(".nc"): continue print(fin) year, month = get_ym_from_path(fin) with Dataset(str(fin)) as ds: if spatial_ind is None: lons, lats = ds.variables["lon"][:], ds.variables["lat"][:] x, y, z = lat_lon.lon_lat_to_cartesian(lons.flatten(), lats.flatten()) ktree = KDTree(list(zip(x, y, z))) x0, y0, z0 = lat_lon.lon_lat_to_cartesian(lon0, lat0) dist, spatial_ind = ktree.query((x0, y0, z0)) for vname_alt in vname_alternatives: try: values = ds[vname_alt][:] values = [field.flatten()[spatial_ind] for field in values] break except IndexError as ierr: pass dates = [datetime(year, month, int(d)) for d in ds["time"][:]] varname_to_list_of_frames[vname].append(pd.DataFrame(index=dates, data=values)) for vname in var_list: df = pd.concat(varname_to_list_of_frames[vname]) assert isinstance(df, pd.DataFrame) df.sort_index(inplace=True) df.to_csv(str(out_dir_p.joinpath("{}.csv".format(vname))), float_format="%.3f", index_label="Time")
def main(): # target grid for interpolation nml_path = "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix_daymet/gemclim_settings.nml" target_grid_config = grid_config.gridconfig_from_gemclim_settings_file( nml_path) print(target_grid_config) target_lons, target_lats = target_grid_config.get_lons_and_lats_of_gridpoint_centers( ) xt, yt, zt = lat_lon.lon_lat_to_cartesian(target_lons.flatten(), target_lats.flatten()) # the output folder out_folder = Path(nml_path).parent # Source data for precip and temperature: Daymet daily aggregated to 10km data_sources = { "PR": "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_prcp_10x10/*.nc*", "TT": "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_tavg_10x10/*.nc*" } vname_map = {"TT": "tavg", "PR": "prcp"} chunk_size = 1000 for vname, data_path in data_sources.items(): with xarray.open_mfdataset(data_path, data_vars="minimal") as ds: vname_daymet = vname_map[vname] arr = ds[vname_daymet] t = ds["time"] ktree = get_ktree(ds) d, sp_inds = ktree.query(list(zip(xt, yt, zt)), k=1) data_out = [] nt = len(t) for start_index in range(0, nt, chunk_size): end_index = min(start_index + chunk_size - 1, nt - 1) chunk = end_index - start_index + 1 arr_sel = arr[start_index:end_index + 1, :, :].to_masked_array() print(arr_sel.shape) data = arr_sel.reshape( (chunk, -1))[:, sp_inds].reshape((chunk, ) + target_lons.shape) data_out.append(data) # --- data_out = np.concatenate(data_out, axis=0) ds_out = xarray.Dataset( data_vars={ vname: (["time", "x", "y"], data_out), "lon": (["x", "y"], target_lons), "lat": (["x", "y"], target_lats), }, coords={"time": ("time", t.values)}, ) ds_out.to_netcdf(str(out_folder / f"{vname}.nc"))
def main(varname=""): plot_utils.apply_plot_params(width_cm=22, height_cm=5, font_size=8) # series = get_monthly_accumulations_area_avg(data_dir="/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009", # varname=varname) # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2009_monthly", # varname=varname) # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_HL_1980-2009_monthly", # varname=varname) hles_bin_edges = np.arange(0.1, 0.34, 0.02) # selected_months = [10, 11, 12, 1, 2, 3, 4, 5] selected_seasons = OrderedDict([("ND", [11, 12]), ("JF", [1, 2]), ("MA", [3, 4]), ("NDJFMA", [11, 12, 1, 2, 3, 4])]) data_root = common_params.data_root label_to_datapath = OrderedDict([ # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009"), # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_1980-2009"), # (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_1989-2010_1989-2010" / "merged"), # (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_2079-2100_2079-2100" / "merged"), (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_1989-2010_monthly_1989-2010" / "merged"), (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_2079-2100_monthly_2079-2100" / "merged"), ]) # longutudes and latitudes of the focus region around the Great Lakes (we define it, mostly for performance # issues and to eliminate regions with 0 hles that still are in the 200 km HLES zone) focus_region_lonlat_nc_file = data_root / "lon_lat.nc" label_to_series = OrderedDict() label_to_color = { common_params.crcm_nemo_cur_label: "skyblue", common_params.crcm_nemo_fut_label: "salmon" } gl_mask = get_gl_mask(label_to_datapath[common_params.crcm_nemo_cur_label]) hles_region_mask = get_mask_of_points_near_lakes(gl_mask, npoints_radius=20) # select a file from the directory sel_file = None for f in label_to_datapath[common_params.crcm_nemo_cur_label].iterdir(): if f.is_file(): sel_file = f break assert sel_file is not None, f"Could not find any files in {label_to_datapath[common_params.crcm_nemo_cur_label]}" # Take into account the focus region with xarray.open_dataset(sel_file) as ds: hles_region_mask_lons, hles_region_mask_lats = [ ds[k].values for k in ["lon", "lat"] ] with xarray.open_dataset(focus_region_lonlat_nc_file) as ds_focus: focus_lons, focus_lats = [ ds_focus[k].values for k in ["lon", "lat"] ] coords_src = lat_lon.lon_lat_to_cartesian( hles_region_mask_lons.flatten(), hles_region_mask_lats.flatten()) coords_dst = lat_lon.lon_lat_to_cartesian(focus_lons.flatten(), focus_lats.flatten()) ktree = KDTree(list(zip(*coords_src))) dists, inds = ktree.query(list(zip(*coords_dst)), k=1) focus_mask = hles_region_mask.flatten() focus_mask[...] = False focus_mask[inds] = True focus_mask.shape = hles_region_mask.shape for seas_name, selected_months in selected_seasons.items(): # read and calculate for label, datapath in label_to_datapath.items(): hles_file = None # select hles file in the folder for f in datapath.iterdir(): if f.name.endswith("_daily.nc"): hles_file = f break assert hles_file is not None, f"Could not find any HLES files in {datapath}" series = get_hles_amount_distribution_from_merged( data_file=hles_file, varname=varname, region_of_interest_mask=hles_region_mask & focus_mask, selected_months=selected_months, bin_edges=hles_bin_edges) label_to_series[label] = series # plotting gs = GridSpec(1, 1, wspace=0.05) fig = plt.figure() ax = fig.add_subplot(gs[0, 0]) # calculate bar widths widths = np.diff(hles_bin_edges) label_to_handle = OrderedDict() for i, (label, series) in enumerate(label_to_series.items()): values = series.values if hasattr(series, "values") else series # values = values / values.sum() * 100 logger.debug([label, values]) logger.debug(f"sum(values) = {sum(values)}") # h = ax.bar(hles_bin_edges[:-1] + i * widths / len(label_to_series), values, width=widths / len(label_to_series), # align="edge", linewidth=0.5, # edgecolor="k", # facecolor=label_to_color[label], label=label, zorder=10) h = ax.plot(hles_bin_edges[:-1], values, color=label_to_color[label], marker="o", label=label, markersize=2.5) # label_to_handle[label] = h ax.set_xlabel("HLES (m)") ax.set_title(f"HLES distribution, {seas_name}") ax.spines['right'].set_visible(False) ax.spines['top'].set_visible(False) # ax.set_title(common_params.varname_to_display_name[varname]) ax.yaxis.grid(True, linestyle="--", linewidth=0.5) # ax.text(1, 1, "(a)", fontdict=dict(weight="bold"), transform=ax.transAxes, va="top", ha="right") ax_with_legend = ax ax.set_xlim((0.1, None)) # area average annual total HLES text_align_props = dict(transform=ax.transAxes, va="bottom", ha="right") # Plot the domain and the HLES region of interest # ax = fig.add_subplot(gs[0, 0]) # topo_nc_file = data_root / "geophys_452x260_me.nc" # ax = plot_domain_and_interest_region(ax, topo_nc_file, focus_region_lonlat_nc_file=focus_region_lonlat_nc_file) # ax.set_title("(a) Experimental domain") # # # Add a common legend labels = list(label_to_handle) handles = [label_to_handle[l] for l in labels] ax_with_legend.legend(bbox_to_anchor=(0, -0.18), loc="upper left", borderaxespad=0., ncol=2) # ax.grid() sel_months_str = "_".join([str(m) for m in selected_months]) common_params.img_folder.mkdir(exist_ok=True) img_file = common_params.img_folder / f"{varname}_histo_amount_cc_m{sel_months_str}_domain.png" print(f"Saving plot to {img_file}") fig.savefig(img_file, **common_params.image_file_options)
def main(convert_mps_to_knots=True): # target grid for interpolation nml_path = "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/obs_anuspmaurer_narr/gemclim_settings.nml" target_grid_config = grid_config.gridconfig_from_gemclim_settings_file(nml_path) print(target_grid_config) target_lons, target_lats = target_grid_config.get_lons_and_lats_of_gridpoint_centers() xt, yt, zt = lat_lon.lon_lat_to_cartesian(target_lons.flatten(), target_lats.flatten()) # the output folder out_folder = Path(nml_path).parent # Source data for wind: NARR data_sources = { "UU": "/RESCUE/skynet3_rech1/huziy/obs_data_for_HLES/initial_data/narr/uwnd.10m.*.nc", "VV": "/RESCUE/skynet3_rech1/huziy/obs_data_for_HLES/initial_data/narr/vwnd.10m.*.nc" } vname_map = { "UU": "uwnd", "VV": "vwnd" } chunk_size = 1000 for vname, data_path in data_sources.items(): with xarray.open_mfdataset(data_path) as ds: vname_daymet = vname_map[vname] arr = ds[vname_daymet] t = ds["time"] ktree = get_ktree(ds) d, sp_inds = ktree.query(list(zip(xt, yt, zt)), k=1) data_out = [] nt = len(t) for start_index in range(0, nt, chunk_size): end_index = min(start_index + chunk_size - 1, nt - 1) chunk = end_index - start_index + 1 arr_sel = arr[start_index:end_index + 1, :, :].to_masked_array() print(arr_sel.shape) data = arr_sel.reshape((chunk, -1))[:, sp_inds].reshape((chunk, ) + target_lons.shape) data_out.append(data) # --- data_out = np.concatenate(data_out, axis=0) if convert_mps_to_knots: data_out *= mps_per_knot ds_out = xarray.Dataset( data_vars={ vname: (["time", "x", "y"], data_out), }, coords={"time": ("time", t.values), "lon": (["x", "y"], target_lons), "lat": (["x", "y"], target_lats), }, ) ds_out[vname].attrs.units = "knots" ds_out.to_netcdf(str(out_folder / f"{vname}.nc"))
def get_daily_percenile_fields_interpolated_to( self, lons_target, lats_target, start_year=-np.Inf, end_year=np.Inf, percentile=0.5, rolling_mean_window_days=None): target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] + lats_target[1, 1] - lats_target[0, 0]) / 2.0 coarsening = int(target_scale_deg / self.characteristic_scale_deg + 0.5) print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}". format(self.characteristic_scale_deg, target_scale_deg, coarsening)) def coarsening_func(x, axis=None): _mask = np.less(np.abs(x - self.missing_value), 1.0e-6) if np.all(_mask): return self.missing_value * np.ma.ones( _mask.shape).mean(axis=axis) y = np.ma.masked_where(_mask, x) return y.mean(axis=axis) # aggregate the data trim_excess = True data = da.coarsen(coarsening_func, self.data, axes={ 1: coarsening, 2: coarsening }, trim_excess=trim_excess) lons_s = da.coarsen(np.mean, da.from_array(self.lons, self.chunks[1:]), axes={ 0: coarsening, 1: coarsening }, trim_excess=trim_excess).compute() lats_s = da.coarsen(np.mean, da.from_array(self.lats, self.chunks[1:]), axes={ 0: coarsening, 1: coarsening }, trim_excess=trim_excess).compute() source_grid = list( zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(), lats_s.flatten()))) print(np.shape(source_grid)) ktree = KDTree(source_grid) dists, inds = ktree.query( list( zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(), lats_target.flatten())))) perc_daily, mask = self.get_daily_percenile_fields_lazy( data, start_year=start_year, end_year=end_year, percentile=percentile, rolling_mean_window_days=rolling_mean_window_days) print("perc_daily.shape=", perc_daily.shape) # do the interpolation for each day perc_daily_interpolated = [] for perc_field in perc_daily: print(perc_field.shape) field = np.ma.masked_where( mask, perc_field.compute()).flatten()[inds].reshape( lons_target.shape) perc_daily_interpolated.append(field) return np.array(perc_daily_interpolated)