Esempio n. 1
0
def interpolate_to_uniform_global_grid(data_in, lons_in, lats_in, out_dx=0.5):
    """
    Interpolate data to a regular, global latlon grid
    :param data_in:
    :param lons_in:
    :param lats_in:
    :param out_dx:
    :return:
    """
    x, y, z = lat_lon.lon_lat_to_cartesian(lons_in.flatten(),
                                           lats_in.flatten())
    tree = cKDTree(list(zip(x, y, z)))

    lons_out = np.arange(-180, 180, 0.5)
    lats_out = np.arange(-90, 90, 0.5)

    lats_out, lons_out = np.meshgrid(lats_out, lons_out)

    x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons_out.flatten(),
                                                       lats_out.flatten())

    dists, inds = tree.query(list(zip(x_out, y_out, z_out)))

    data_out = data_in.flatten()[inds].reshape(lons_out.shape)

    return lons_out, lats_out, data_out
Esempio n. 2
0
    def __init__(self, lon1=180.0, lat1=0.0, lon2=180.0, lat2=0.0, **kwargs):
        """
        Basis vectors of the rotated coordinate system in the original coord system
        e1 = -p1/|p1|                   =>   row0
        e2 = -( p2 - (p1, p2) * p1) / |p2 - (p1, p2) * p1| #perpendicular to e1, and lies in
        the plane parallel to the plane (p1^p2)  => row1
        e3 = [p1,p2] / |[p1, p2]| , perpendicular to the plane (p1^p2)          => row2
        """

        print(kwargs)

        self.lon1 = lon1
        self.lon2 = lon2
        self.lat1 = lat1
        self.lat2 = lat2

        print(lon1, lat1, lon2, lat2)

        self.mean_earth_radius_m_crcm5 = 0.637122e7  # mean earth radius used in the CRCM5 model for area calculation

        p1 = lat_lon.lon_lat_to_cartesian(lon1, lat1, r_earth=1.0)
        p2 = lat_lon.lon_lat_to_cartesian(lon2, lat2, r_earth=1.0)

        p1 = np.array(p1)
        p2 = np.array(p2)

        cross_prod = np.cross(p1, p2)
        dot_prod = np.dot(p1, p2)

        row0 = -np.array(p1) / np.sqrt(np.dot(p1, p1))
        e2 = (dot_prod * p1 - p2)
        row1 = e2 / np.sqrt(np.dot(e2, e2))
        row2 = cross_prod / np.sqrt(np.dot(cross_prod, cross_prod))
        self.rot_matrix = np.matrix([row0, row1, row2])
        assert isinstance(self.rot_matrix, np.matrix)
Esempio n. 3
0
    def to_mask(self, lons_2d_grid, lats_2d_grid):
        """

        :param lons_2d_grid:
        :param lats_2d_grid:
        :return: the mask of the subregion corresponding to the grid with the upper right and lower left points from self
        """
        x_g, y_g, z_g = lat_lon.lon_lat_to_cartesian(lons_2d_grid.flatten(),
                                                     lats_2d_grid.flatten())
        ktree = KDTree(list(zip(x_g, y_g, z_g)))

        ll_x, ll_y, ll_z = lat_lon.lon_lat_to_cartesian(
            self.lleft_lon, self.lleft_lat)
        ur_x, ur_y, ur_z = lat_lon.lon_lat_to_cartesian(
            self.uright_lon, self.uright_lat)

        i_g, j_g = np.indices(lons_2d_grid.shape)

        i_g_flat, j_g_flat = i_g.flatten(), j_g.flatten()

        _, ind_ll = ktree.query((ll_x, ll_y, ll_z), k=1)
        _, ind_ur = ktree.query((ur_x, ur_y, ur_z), k=1)

        i_ll, j_ll = i_g_flat[ind_ll], j_g_flat[ind_ll]
        i_ur, j_ur = i_g_flat[ind_ur], j_g_flat[ind_ur]

        res = np.zeros_like(lons_2d_grid, dtype=bool)
        res[i_ll:i_ur + 1, j_ll:j_ur + 1] = 1

        return res, (i_ll, j_ll), (i_ur, j_ur)
Esempio n. 4
0
    def to_mask(self, lons_2d_grid, lats_2d_grid):

        """

        :param lons_2d_grid:
        :param lats_2d_grid:
        :return: the mask of the subregion corresponding to the grid with the upper right and lower left points from self
        """
        x_g, y_g, z_g = lat_lon.lon_lat_to_cartesian(lons_2d_grid.flatten(), lats_2d_grid.flatten())
        ktree = KDTree(list(zip(x_g, y_g, z_g)))

        ll_x, ll_y, ll_z = lat_lon.lon_lat_to_cartesian(self.lleft_lon, self.lleft_lat)
        ur_x, ur_y, ur_z = lat_lon.lon_lat_to_cartesian(self.uright_lon, self.uright_lat)


        i_g, j_g = np.indices(lons_2d_grid.shape)

        i_g_flat, j_g_flat = i_g.flatten(), j_g.flatten()


        _, ind_ll = ktree.query((ll_x, ll_y, ll_z), k=1)
        _, ind_ur = ktree.query((ur_x, ur_y, ur_z), k=1)


        i_ll, j_ll = i_g_flat[ind_ll], j_g_flat[ind_ll]
        i_ur, j_ur = i_g_flat[ind_ur], j_g_flat[ind_ur]

        res = np.zeros_like(lons_2d_grid, dtype=bool)
        res[i_ll:i_ur + 1, j_ll: j_ur + 1] = 1

        return res, (i_ll, j_ll), (i_ur, j_ur)
def get_ktree(ds: xarray.Dataset):
    lon, lat = ds["lon"].values, ds["lat"].values
    x, y, z = lat_lon.lon_lat_to_cartesian(lon.flatten(), lat.flatten())

    return KDTree(
        list(zip(x, y, z))
    )
Esempio n. 6
0
 def toGeographicLonLat(self, x, y):
     """
     convert geographic lat / lon to rotated coordinates
     """
     p = lat_lon.lon_lat_to_cartesian(x, y, r_earth=1)
     p = self.rot_matrix.T * np.mat(p).T
     return lat_lon.cartesian_to_lon_lat(p.A1)
Esempio n. 7
0
    def get_seasonal_clim_interpolate_to(self,
                                         lons=None,
                                         lats=None,
                                         start_year=2002,
                                         end_year=2010,
                                         season_to_months: dict = None,
                                         vname: str = "sst"):
        """
        Calculate the climatology and then interpolate it to the given lon and lat fields
        :param lons:
        :param lats:
        :param start_year:
        :param end_year:
        :param season_to_months:
        :param vname:
        :return:
        """
        seasclim = self.get_seasonal_clim(start_year=start_year,
                                          end_year=end_year,
                                          season_to_months=season_to_months,
                                          vname=vname)

        xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons.flatten(),
                                                  lats.flatten())

        inds = None
        seasclim_interpolated = OrderedDict()
        for sname, data in seasclim.items():

            if inds is None:
                lons_s, lats_s = data.coords["lon"][:], data.coords["lat"][:]

                print(data)

                lats_s, lons_s = np.meshgrid(lats_s, lons_s)
                xs, ys, zs = lat_lon.lon_lat_to_cartesian(
                    lons_s.flatten(), lats_s.flatten())

                ktree = KDTree(list(zip(xs, ys, zs)))

                dists, inds = ktree.query(list(zip(xt, yt, zt)))

            # transpose because the input field's layout is (t,z,lat, lon)
            seasclim_interpolated[sname] = data.values.T.flatten(
            )[inds].reshape(lons.shape)

        return seasclim_interpolated
Esempio n. 8
0
    def toProjectionXY(self, lon, lat):
        """
        Convert geographic lon/lat coordinates to the rotated lat lon coordinates
        """

        p = lat_lon.lon_lat_to_cartesian(lon, lat, r_earth=1)
        p = self.rot_matrix * np.mat(p).T
        return lat_lon.cartesian_to_lon_lat(p.A1)
Esempio n. 9
0
def get_ktree(ds: xarray.Dataset):

    lonv = ds["lon"]

    if lonv.ndim == 3:
        lon, lat = ds["lon"][0].values, ds["lat"][0].values
    else:
        lon, lat = ds["lon"].values, ds["lat"].values

    x, y, z = lat_lon.lon_lat_to_cartesian(lon.flatten(), lat.flatten())

    return KDTree(
        list(zip(x, y, z))
    )
Esempio n. 10
0
File: oisst.py Progetto: guziy/RPN
    def get_seasonal_clim_interpolate_to(self, lons=None, lats=None , start_year=2002, end_year=2010, season_to_months:dict=None, vname:str= "sst"):
        """
        Calculate the climatology and then interpolate it to the given lon and lat fields
        :param lons:
        :param lats:
        :param start_year:
        :param end_year:
        :param season_to_months:
        :param vname:
        :return:
        """
        seasclim = self.get_seasonal_clim(start_year=start_year, end_year=end_year, season_to_months=season_to_months, vname=vname)

        xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons.flatten(), lats.flatten())


        inds = None
        seasclim_interpolated = OrderedDict()
        for sname, data in seasclim.items():

            if inds is None:
                lons_s, lats_s = data.coords["lon"][:], data.coords["lat"][:]

                print(data)

                lats_s, lons_s = np.meshgrid(lats_s, lons_s)
                xs, ys, zs = lat_lon.lon_lat_to_cartesian(lons_s.flatten(), lats_s.flatten())

                ktree = KDTree(list(zip(xs, ys, zs)))

                dists, inds = ktree.query(list(zip(xt, yt, zt)))


            # transpose because the input field's layout is (t,z,lat, lon)
            seasclim_interpolated[sname] = data.values.T.flatten()[inds].reshape(lons.shape)

        return seasclim_interpolated
Esempio n. 11
0
File: commons.py Progetto: guziy/RPN
def interpolate_to_uniform_global_grid(data_in, lons_in, lats_in, out_dx=0.5):
    """
    Interpolate data to a regular, global latlon grid
    :param data_in:
    :param lons_in:
    :param lats_in:
    :param out_dx:
    :return:
    """
    x, y, z = lat_lon.lon_lat_to_cartesian(lons_in.flatten(), lats_in.flatten())
    tree = cKDTree(list(zip(x, y, z)))

    lons_out = np.arange(-180, 180, 0.5)
    lats_out = np.arange(-90, 90, 0.5)

    lats_out, lons_out = np.meshgrid(lats_out, lons_out)

    x_out, y_out, z_out = lat_lon.lon_lat_to_cartesian(lons_out.flatten(), lats_out.flatten())

    dists, inds = tree.query(list(zip(x_out, y_out, z_out)))

    data_out = data_in.flatten()[inds].reshape(lons_out.shape)

    return lons_out, lats_out, data_out
def main(varname=""):
    plot_utils.apply_plot_params(width_cm=22, height_cm=5, font_size=8)
    # series = get_monthly_accumulations_area_avg(data_dir="/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009",
    #                                             varname=varname)

    # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2009_monthly",
    #                                             varname=varname)

    # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_HL_1980-2009_monthly",
    #                                             varname=varname)

    selected_months = [10, 11, 12, 1, 2, 3, 4, 5]

    data_root = common_params.data_root

    label_to_datapath = OrderedDict([
        # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009"),
        # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_1980-2009"),
        # (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_1989-2010_1989-2010" / "merged"),
        # (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_2079-2100_2079-2100" / "merged"),
        (common_params.crcm_nemo_cur_label, data_root /
         "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_1989-2010_monthly_1989-2010"
         / "merged"),
        (common_params.crcm_nemo_fut_label, data_root /
         "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_2079-2100_monthly_2079-2100"
         / "merged"),
    ])

    # longutudes and latitudes of the focus region around the Great Lakes (we define it, mostly for performance
    # issues and to eliminate regions with 0 hles that still are in the 200 km HLES zone)
    focus_region_lonlat_nc_file = data_root / "lon_lat.nc"

    label_to_series = OrderedDict()
    label_to_color = {
        common_params.crcm_nemo_cur_label: "skyblue",
        common_params.crcm_nemo_fut_label: "salmon"
    }

    gl_mask = get_gl_mask(label_to_datapath[common_params.crcm_nemo_cur_label])
    hles_region_mask = get_mask_of_points_near_lakes(gl_mask,
                                                     npoints_radius=20)

    # select a file from the directory
    sel_file = None
    for f in label_to_datapath[common_params.crcm_nemo_cur_label].iterdir():
        if f.is_file():
            sel_file = f
            break

    assert sel_file is not None, f"Could not find any files in {label_to_datapath[common_params.crcm_nemo_cur_label]}"

    # Take into account the focus region
    with xarray.open_dataset(sel_file) as ds:
        hles_region_mask_lons, hles_region_mask_lats = [
            ds[k].values for k in ["lon", "lat"]
        ]

        with xarray.open_dataset(focus_region_lonlat_nc_file) as ds_focus:
            focus_lons, focus_lats = [
                ds_focus[k].values for k in ["lon", "lat"]
            ]

        coords_src = lat_lon.lon_lat_to_cartesian(
            hles_region_mask_lons.flatten(), hles_region_mask_lats.flatten())
        coords_dst = lat_lon.lon_lat_to_cartesian(focus_lons.flatten(),
                                                  focus_lats.flatten())

        ktree = KDTree(list(zip(*coords_src)))

        dists, inds = ktree.query(list(zip(*coords_dst)), k=1)

        focus_mask = hles_region_mask.flatten()
        focus_mask[...] = False
        focus_mask[inds] = True
        focus_mask.shape = hles_region_mask.shape

    for label, datapath in label_to_datapath.items():
        hles_file = None
        for f in datapath.iterdir():
            if f.name.endswith("_daily.nc"):
                hles_file = f
                break

        assert hles_file is not None, f"Could not find any HLES files in {datapath}"

        series = get_monthly_accumulations_area_avg_from_merged(
            data_file=hles_file,
            varname=varname,
            region_of_interest_mask=hles_region_mask & focus_mask)
        label_to_series[label] = series

    #  plotting
    gs = GridSpec(1, 2, wspace=0.05)

    fig = plt.figure()
    ax = fig.add_subplot(gs[0, 1])

    start_date = datetime(2001, 10, 1)

    dates = [
        start_date.replace(month=(start_date.month + i) % 13 +
                           int((start_date.month + i) % 13 == 0),
                           year=start_date.year + (start_date.month + i) // 13)
        for i in range(13)
    ]

    def format_month_label(x, pos):
        logging.debug(num2date(x))
        return "{:%b}".format(num2date(x))

    # calculate bar widths
    dates_num = date2num(dates)
    width = np.diff(dates_num) / (len(label_to_series) * 1.5)
    width = np.array([width[0] for _ in width])

    # select the months
    width = np.array(
        [w for w, d in zip(width, dates) if d.month in selected_months])
    dates = [d for d in dates[:-1] if d.month in selected_months]
    dates_num = date2num(dates)

    label_to_handle = OrderedDict()

    label_to_annual_hles = OrderedDict()

    for i, (label, series) in enumerate(label_to_series.items()):
        values = [series[d.month] * 100 for d in dates]

        # convert to percentages
        values_sum = sum(values)

        # save the total annual hles for later reuse
        label_to_annual_hles[label] = values_sum

        # values = [v / values_sum * 100 for v in values]

        logger.debug([label, values])
        logger.debug(f"sum(values) = {sum(values)}")

        h = ax.bar(dates_num + i * width,
                   values,
                   width=width,
                   align="edge",
                   linewidth=0.5,
                   edgecolor="k",
                   facecolor=label_to_color[label],
                   label=label,
                   zorder=10)
        label_to_handle[label] = h

    ax.set_ylabel("HLES (cm/day)")
    ax.set_title("(b) Monthly HLES distribution")

    ax.xaxis.set_major_formatter(FuncFormatter(func=format_month_label))
    ax.xaxis.set_major_locator(
        MonthLocator(bymonthday=int(sum(width[:len(label_to_series)]) / 2.) +
                     1))
    ax.spines['right'].set_visible(False)
    ax.spines['top'].set_visible(False)
    # ax.set_title(common_params.varname_to_display_name[varname])
    ax.yaxis.grid(True, linestyle="--", linewidth=0.5)
    # ax.text(1, 1, "(a)", fontdict=dict(weight="bold"), transform=ax.transAxes, va="top", ha="right")
    ax_with_legend = ax

    # area average annual total HLES
    text_align_props = dict(transform=ax.transAxes, va="bottom", ha="right")

    cur_hles_annual = label_to_annual_hles[common_params.crcm_nemo_cur_label]
    fut_hles_annual = label_to_annual_hles[common_params.crcm_nemo_fut_label]

    ax.text(
        1,
        0.2,
        r"$\Delta_{\rm total}$" +
        f"({(fut_hles_annual - cur_hles_annual) / cur_hles_annual * 100:.1f}%)",
        **text_align_props,
        fontdict=dict(size=6))

    # Plot the domain and the HLES region of interest
    ax = fig.add_subplot(gs[0, 0])
    topo_nc_file = data_root / "geophys_452x260_me.nc"
    ax = plot_domain_and_interest_region(
        ax,
        topo_nc_file,
        focus_region_lonlat_nc_file=focus_region_lonlat_nc_file)
    ax.set_title("(a) Experimental domain")

    # Add a common legend
    labels = list(label_to_handle)
    handles = [label_to_handle[l] for l in labels]
    ax_with_legend.legend(handles,
                          labels,
                          bbox_to_anchor=(0, -0.18),
                          loc="upper left",
                          borderaxespad=0.,
                          ncol=2)

    # ax.grid()
    sel_months_str = "_".join([str(m) for m in selected_months])

    common_params.img_folder.mkdir(exist_ok=True)
    img_file = common_params.img_folder / f"{varname}_histo_cc_m{sel_months_str}_domain.png"
    print(f"Saving plot to {img_file}")
    fig.savefig(img_file, **common_params.image_file_options)
def main(in_dir="/RESCUE/skynet3_rech1/huziy/anusplin_links",
         out_dir="/HOME/huziy/skynet3_rech1/hail/anusplin_ts"):

    out_dir_p = Path(out_dir)

    in_dir_p = Path(in_dir)

    lon0 = -114.0708
    lat0 = 51.0486

    vname = "daily_precipitation_accumulation"
    vname_alternatives = ["daily_accumulation_precipitation"]
    vname_alternatives.append(vname)

    var_list = [vname]
    fname_hint = "pcp"

    spatial_ind = None

    varname_to_list_of_frames = {vname: [] for vname in var_list}

    for fin in in_dir_p.iterdir():

        if fin.name.lower().endswith("ret"):
            continue

        if fin.name.lower().endswith("verif"):
            continue

        if fname_hint not in fin.name.lower():
            continue

        if not fin.name.endswith(".nc"):
            continue

        print(fin)

        year, month = get_ym_from_path(fin)
        with Dataset(str(fin)) as ds:

            if spatial_ind is None:

                lons, lats = ds.variables["lon"][:], ds.variables["lat"][:]

                x, y, z = lat_lon.lon_lat_to_cartesian(lons.flatten(),
                                                       lats.flatten())

                ktree = KDTree(list(zip(x, y, z)))

                x0, y0, z0 = lat_lon.lon_lat_to_cartesian(lon0, lat0)

                dist, spatial_ind = ktree.query((x0, y0, z0))

            for vname_alt in vname_alternatives:
                try:
                    values = ds[vname_alt][:]
                    values = [field.flatten()[spatial_ind] for field in values]
                    break
                except IndexError as ierr:
                    pass

            dates = [datetime(year, month, int(d)) for d in ds["time"][:]]

            varname_to_list_of_frames[vname].append(
                pd.DataFrame(index=dates, data=values))

    for vname in var_list:
        df = pd.concat(varname_to_list_of_frames[vname])

        assert isinstance(df, pd.DataFrame)

        df.sort_index(inplace=True)

        df.to_csv(str(out_dir_p.joinpath("{}.csv".format(vname))),
                  float_format="%.3f",
                  index_label="Time")
def main():

    # target grid for interpolation
    nml_path = "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix_daymet/gemclim_settings.nml"
    target_grid_config = grid_config.gridconfig_from_gemclim_settings_file(nml_path)
    print(target_grid_config)

    target_lons, target_lats = target_grid_config.get_lons_and_lats_of_gridpoint_centers()
    xt, yt, zt = lat_lon.lon_lat_to_cartesian(target_lons.flatten(), target_lats.flatten())

    # the output folder
    out_folder = Path(nml_path).parent


    # Source data for precip and temperature: Daymet daily aggregated to 10km
    data_sources = {
        "PR": "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_prcp_10x10/*.nc*",
        "TT": "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_tavg_10x10/*.nc*"
    }

    vname_map = {
        "TT": "tavg", "PR": "prcp"
    }

    chunk_size = 1000

    for vname, data_path in data_sources.items():
        with xarray.open_mfdataset(data_path, data_vars="minimal") as ds:
            vname_daymet = vname_map[vname]
            arr = ds[vname_daymet]

            t = ds["time"]

            ktree = get_ktree(ds)

            d, sp_inds = ktree.query(list(zip(xt, yt, zt)), k=1)

            data_out = []

            nt = len(t)
            for start_index in range(0, nt, chunk_size):
                end_index = min(start_index + chunk_size - 1, nt - 1)
                chunk = end_index - start_index + 1

                arr_sel = arr[start_index:end_index + 1, :, :].to_masked_array()
                print(arr_sel.shape)

                data = arr_sel.reshape((chunk, -1))[:, sp_inds].reshape((chunk, ) + target_lons.shape)
                data_out.append(data)

            # ---
            data_out = np.concatenate(data_out, axis=0)


            ds_out = xarray.Dataset(
                data_vars={
                    vname: (["time", "x", "y"], data_out),
                    "lon": (["x", "y"], target_lons),
                    "lat": (["x", "y"], target_lats),
                },
                coords={"time": ("time", t.values)},
            )

            ds_out.to_netcdf(str(out_folder / f"{vname}.nc"))
Esempio n. 15
0
    def get_seasonal_means_with_ttest_stats_interpolated_to(
            self,
            lons_target,
            lats_target,
            season_to_monthperiod=None,
            start_year=-np.Inf,
            end_year=np.Inf,
            convert_monthly_accumulators_to_daily=False):
        """

        :param lons_target, lats_target: 2d arrays of target longitudes and latitudes
        :param season_to_monthperiod:
        :param start_year:
        :param end_year:
        :param convert_monthly_accumulators_to_daily: if true converts monthly accumulators to daily,
        :return dict(season: [mean, std, nobs])


        # coarsen the data and coordinates to the target scale and interpolate using nearest neighbours
        """

        target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] +
                            lats_target[1, 1] - lats_target[0, 0]) / 2.0

        coarsening = int(target_scale_deg / self.characteristic_scale_deg +
                         0.5)
        print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}".
              format(self.characteristic_scale_deg, target_scale_deg,
                     coarsening))

        def coarsening_func(x, axis=None):
            _mask = np.less(np.abs(x - self.missing_value), 1.0e-6)

            if np.all(_mask):
                return self.missing_value * np.ma.ones(
                    _mask.shape).mean(axis=axis)

            y = np.ma.masked_where(_mask, x)

            return y.mean(axis=axis)

        # aggregate the data
        trim_excess = True
        data = da.coarsen(coarsening_func,
                          self.data,
                          axes={
                              1: coarsening,
                              2: coarsening
                          },
                          trim_excess=trim_excess)
        lons_s = da.coarsen(np.mean,
                            da.from_array(self.lons, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()
        lats_s = da.coarsen(np.mean,
                            da.from_array(self.lats, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()

        source_grid = list(
            zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(),
                                              lats_s.flatten())))
        print(np.shape(source_grid))
        ktree = KDTree(source_grid)

        dists, inds = ktree.query(
            list(
                zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(),
                                                  lats_target.flatten()))))

        print("data.shape = ", data.shape)
        result, mask = self.__get_seasonal_means_with_ttest_stats_dask_lazy(
            data,
            season_to_monthperiod=season_to_monthperiod,
            start_year=start_year,
            end_year=end_year,
            convert_monthly_accumulators_to_daily=
            convert_monthly_accumulators_to_daily)

        # invoke the computations and interpolate the result
        for season in result:
            print("Computing for {}".format(season))
            for i in range(len(result[season]) - 1):

                result[season][i] = np.ma.masked_where(
                    mask, result[season][i].compute()).flatten()[inds].reshape(
                        lons_target.shape)

        return result
def main():
    start_year = 1980
    end_year = 2009

    HL_LABEL = "CRCM5_HL"
    NEMO_LABEL = "CRCM5_NEMO"

    # critical p-value for the ttest aka significance level
    p_crit = 1

    vars_of_interest = [
        # T_AIR_2M,
        # TOTAL_PREC,
        # SWE,
        default_varname_mappings.LATENT_HF,
        default_varname_mappings.SENSIBLE_HF,
        default_varname_mappings.LWRAD_DOWN,
        default_varname_mappings.SWRAD_DOWN
        #       LAKE_ICE_FRACTION
    ]

    coastline_width = 0.3

    vname_to_seasonmonths_map = {
        SWE: OrderedDict([("November", [11]),
                          ("December", [12]),
                          ("January", [1, ])]),
        LAKE_ICE_FRACTION: OrderedDict([
            ("December", [12]),
            ("January", [1, ]),
            ("February", [2, ]),
            ("March", [3, ]),
            ("April", [4, ])]),
        T_AIR_2M: season_to_months,
        TOTAL_PREC: season_to_months,
    }


    # set season to months mappings
    for vname in vars_of_interest:
        if vname not in vname_to_seasonmonths_map:
            vname_to_seasonmonths_map[vname] = season_to_months


    sim_configs = {
        HL_LABEL: RunConfig(data_path="/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected",
                            start_year=start_year, end_year=end_year, label=HL_LABEL),

        NEMO_LABEL: RunConfig(data_path="/RECH2/huziy/coupling/coupled-GL-NEMO1h_30min/selected_fields",
                              start_year=start_year, end_year=end_year, label=NEMO_LABEL),
    }

    sim_labels = [HL_LABEL, NEMO_LABEL]

    vname_to_level = {
        T_AIR_2M: VerticalLevel(1, level_kinds.HYBRID),
        U_WE: VerticalLevel(1, level_kinds.HYBRID),
        V_SN: VerticalLevel(1, level_kinds.HYBRID),
        default_varname_mappings.LATENT_HF: VerticalLevel(5, level_kinds.ARBITRARY),
        default_varname_mappings.SENSIBLE_HF: VerticalLevel(5, level_kinds.ARBITRARY),
    }

    # Try to get the land_fraction for masking if necessary
    land_fraction = None
    try:
        first_ts_file = Path(sim_configs[HL_LABEL].data_path).parent / "pm1979010100_00000000p"

        land_fraction = get_land_fraction(first_timestep_file=first_ts_file)
    except Exception as err:
        raise err
        pass

    # Calculations

    # prepare params for interpolation
    lons_t, lats_t, bsmap = get_target_lons_lats_basemap(sim_configs[HL_LABEL])

    # get a subdomain of the simulation domain
    nx, ny = lons_t.shape
    iss = IndexSubspace(i_start=20, j_start=10, i_end=nx // 1.5, j_end=ny / 1.8)
    # just to change basemap limits
    lons_t, lats_t, bsmap = get_target_lons_lats_basemap(sim_configs[HL_LABEL], sub_space=iss)

    xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons_t.flatten(), lats_t.flatten())

    vname_map = {}
    vname_map.update(default_varname_mappings.vname_map_CRCM5)

    # Read and calculate simulated seasonal means
    mod_label_to_vname_to_season_to_std = {}
    mod_label_to_vname_to_season_to_nobs = {}

    sim_data = defaultdict(dict)
    for label, r_config in sim_configs.items():

        store_config = {
            "base_folder": r_config.data_path,
            "data_source_type": data_source_types.SAMPLES_FOLDER_FROM_CRCM_OUTPUT_VNAME_IN_FNAME,
            "varname_mapping": vname_map,
            "level_mapping": vname_to_level,
            "offset_mapping": default_varname_mappings.vname_to_offset_CRCM5,
            "multiplier_mapping": default_varname_mappings.vname_to_multiplier_CRCM5,
        }

        dm = DataManager(store_config=store_config)

        mod_label_to_vname_to_season_to_std[label] = {}
        mod_label_to_vname_to_season_to_nobs[label] = {}

        interp_indices = None
        for vname in vars_of_interest:

            # --
            end_year_for_current_var = end_year
            if vname == SWE:
                end_year_for_current_var = min(1996, end_year)

            # --
            seas_to_year_to_mean = dm.get_seasonal_means(varname_internal=vname,
                                                         start_year=start_year,
                                                         end_year=end_year_for_current_var,
                                                         season_to_months=vname_to_seasonmonths_map[vname])

            # get the climatology
            seas_to_clim = {seas: np.array(list(y_to_means.values())).mean(axis=0) for seas, y_to_means in
                            seas_to_year_to_mean.items()}

            sim_data[label][vname] = seas_to_clim

            if interp_indices is None:
                _, interp_indices = dm.get_kdtree().query(list(zip(xt, yt, zt)))

            season_to_std = {}
            mod_label_to_vname_to_season_to_std[label][vname] = season_to_std

            season_to_nobs = {}
            mod_label_to_vname_to_season_to_nobs[label][vname] = season_to_nobs

            for season in seas_to_clim:
                interpolated_field = seas_to_clim[season].flatten()[interp_indices].reshape(lons_t.shape)
                seas_to_clim[season] = interpolated_field

                # calculate standard deviations of the interpolated fields
                season_to_std[season] = np.asarray([field.flatten()[interp_indices].reshape(lons_t.shape) for field in
                                                    seas_to_year_to_mean[season].values()]).std(axis=0)

                # calculate numobs for the ttest
                season_to_nobs[season] = np.ones_like(lons_t) * len(seas_to_year_to_mean[season])



    # Plotting: interpolate to the same grid and plot obs and biases
    xx, yy = bsmap(lons_t, lats_t)
    lons_t[lons_t > 180] -= 360


    for vname in vars_of_interest:

        field_mask = maskoceans(lons_t, lats_t, np.zeros_like(lons_t), inlands=vname in [SWE]).mask
        field_mask_lakes = maskoceans(lons_t, lats_t, np.zeros_like(lons_t), inlands=True).mask

        plot_utils.apply_plot_params(width_cm=11 * len(vname_to_seasonmonths_map[vname]), height_cm=20, font_size=8)

        fig = plt.figure()



        nrows = len(sim_configs) + 1
        ncols = len(vname_to_seasonmonths_map[vname])
        gs = GridSpec(nrows=nrows, ncols=ncols)




        # plot the fields
        for current_row, sim_label in enumerate(sim_labels):
            for col, season in enumerate(vname_to_seasonmonths_map[vname]):

                field = sim_data[sim_label][vname][season]

                ax = fig.add_subplot(gs[current_row, col])

                if current_row == 0:
                    ax.set_title(season)

                clevs = get_clevs(vname)
                if clevs is not None:
                    bnorm = BoundaryNorm(clevs, len(clevs) - 1)
                    cmap = cm.get_cmap("viridis", len(clevs) - 1)
                else:
                    cmap = "viridis"
                    bnorm = None

                the_mask = field_mask_lakes if vname in [T_AIR_2M, TOTAL_PREC, SWE] else field_mask
                to_plot = np.ma.masked_where(the_mask, field) * internal_name_to_multiplier[vname]



                # temporary plot the actual values
                cs = bsmap.contourf(xx, yy, to_plot, ax=ax, levels=get_clevs(vname), cmap=cmap, norm=bnorm, extend="both")
                bsmap.drawcoastlines(linewidth=coastline_width)
                bsmap.colorbar(cs, ax=ax)

                if col == 0:
                    ax.set_ylabel("{}".format(sim_label))





        # plot differences between the fields
        for col, season in enumerate(vname_to_seasonmonths_map[vname]):

            field = sim_data[NEMO_LABEL][vname][season] - sim_data[HL_LABEL][vname][season]

            ax = fig.add_subplot(gs[-1, col])

            clevs = get_clevs(vname + "biasdiff")
            if clevs is not None:
                bnorm = BoundaryNorm(clevs, len(clevs) - 1)
                cmap = cm.get_cmap("bwr", len(clevs) - 1)
            else:
                cmap = "bwr"
                bnorm = None


            to_plot = field * internal_name_to_multiplier[vname]
            # to_plot = np.ma.masked_where(field_mask, field) * internal_name_to_multiplier[vname]



            # ttest
            a = sim_data[NEMO_LABEL][vname][season]  # Calculate the simulation data back from biases
            std_a = mod_label_to_vname_to_season_to_std[NEMO_LABEL][vname][season]
            nobs_a = mod_label_to_vname_to_season_to_nobs[NEMO_LABEL][vname][season]

            b = sim_data[HL_LABEL][vname][season]  # Calculate the simulation data back from biases
            std_b = mod_label_to_vname_to_season_to_std[HL_LABEL][vname][season]
            nobs_b = mod_label_to_vname_to_season_to_nobs[HL_LABEL][vname][season]


            t, p = ttest_ind_from_stats(mean1=a, std1=std_a, nobs1=nobs_a,
                                        mean2=b, std2=std_b, nobs2=nobs_b, equal_var=False)

            # Mask non-significant differences as given by the ttest
            to_plot = np.ma.masked_where(p > p_crit, to_plot)


            # mask the points with not sufficient land fraction
            if land_fraction is not None and vname in [SWE, ]:
                to_plot = np.ma.masked_where(land_fraction < 0.05, to_plot)


            # print("land fractions for large differences ", land_fraction[to_plot > 30])


            cs = bsmap.contourf(xx, yy, to_plot, ax=ax, extend="both", levels=get_clevs(vname + "biasdiff"), cmap=cmap, norm=bnorm)
            bsmap.drawcoastlines(linewidth=coastline_width)
            bsmap.colorbar(cs, ax=ax)

            if col == 0:
                ax.set_ylabel("{}\n-\n{}".format(NEMO_LABEL, HL_LABEL))


        fig.tight_layout()

        # save a figure per variable
        img_file = "seasonal_differences_noobs_{}_{}_{}-{}.png".format(vname,
                                                            "-".join([s for s in vname_to_seasonmonths_map[vname]]),
                                                            start_year, end_year)
        img_file = img_folder.joinpath(img_file)

        fig.savefig(str(img_file), dpi=300)

        plt.close(fig)
Esempio n. 17
0
def main(in_dir="/RESCUE/skynet3_rech1/huziy/anusplin_links", out_dir="/HOME/huziy/skynet3_rech1/hail/anusplin_ts"):

    out_dir_p = Path(out_dir)

    in_dir_p = Path(in_dir)

    lon0 = -114.0708
    lat0 = 51.0486


    vname = "daily_precipitation_accumulation"
    vname_alternatives = ["daily_accumulation_precipitation"]
    vname_alternatives.append(vname)

    var_list = [vname]
    fname_hint = "pcp"

    spatial_ind = None


    varname_to_list_of_frames = {vname: [] for vname in var_list}


    for fin in in_dir_p.iterdir():

        if fin.name.lower().endswith("ret"):
            continue

        if fin.name.lower().endswith("verif"):
            continue


        if fname_hint not in fin.name.lower():
            continue



        if not fin.name.endswith(".nc"):
            continue


        print(fin)

        year, month = get_ym_from_path(fin)
        with Dataset(str(fin)) as ds:



            if spatial_ind is None:

                lons, lats = ds.variables["lon"][:], ds.variables["lat"][:]

                x, y, z = lat_lon.lon_lat_to_cartesian(lons.flatten(), lats.flatten())

                ktree = KDTree(list(zip(x, y, z)))

                x0, y0, z0 = lat_lon.lon_lat_to_cartesian(lon0, lat0)

                dist, spatial_ind = ktree.query((x0, y0, z0))



            for vname_alt in vname_alternatives:
                try:
                    values = ds[vname_alt][:]
                    values = [field.flatten()[spatial_ind] for field in values]
                    break
                except IndexError as ierr:
                    pass


            dates = [datetime(year, month, int(d)) for d in ds["time"][:]]


            varname_to_list_of_frames[vname].append(pd.DataFrame(index=dates, data=values))




    for vname in var_list:
        df = pd.concat(varname_to_list_of_frames[vname])

        assert isinstance(df, pd.DataFrame)

        df.sort_index(inplace=True)

        df.to_csv(str(out_dir_p.joinpath("{}.csv".format(vname))), float_format="%.3f", index_label="Time")
def main():

    # target grid for interpolation
    nml_path = "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/GL_0.1_452x260_icefix_daymet/gemclim_settings.nml"
    target_grid_config = grid_config.gridconfig_from_gemclim_settings_file(
        nml_path)
    print(target_grid_config)

    target_lons, target_lats = target_grid_config.get_lons_and_lats_of_gridpoint_centers(
    )
    xt, yt, zt = lat_lon.lon_lat_to_cartesian(target_lons.flatten(),
                                              target_lats.flatten())

    # the output folder
    out_folder = Path(nml_path).parent

    # Source data for precip and temperature: Daymet daily aggregated to 10km
    data_sources = {
        "PR":
        "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_prcp_10x10/*.nc*",
        "TT":
        "/snow3/huziy/Daymet_daily_derivatives/daymet_spatial_agg_tavg_10x10/*.nc*"
    }

    vname_map = {"TT": "tavg", "PR": "prcp"}

    chunk_size = 1000

    for vname, data_path in data_sources.items():
        with xarray.open_mfdataset(data_path, data_vars="minimal") as ds:
            vname_daymet = vname_map[vname]
            arr = ds[vname_daymet]

            t = ds["time"]

            ktree = get_ktree(ds)

            d, sp_inds = ktree.query(list(zip(xt, yt, zt)), k=1)

            data_out = []

            nt = len(t)
            for start_index in range(0, nt, chunk_size):
                end_index = min(start_index + chunk_size - 1, nt - 1)
                chunk = end_index - start_index + 1

                arr_sel = arr[start_index:end_index +
                              1, :, :].to_masked_array()
                print(arr_sel.shape)

                data = arr_sel.reshape(
                    (chunk,
                     -1))[:, sp_inds].reshape((chunk, ) + target_lons.shape)
                data_out.append(data)

            # ---
            data_out = np.concatenate(data_out, axis=0)

            ds_out = xarray.Dataset(
                data_vars={
                    vname: (["time", "x", "y"], data_out),
                    "lon": (["x", "y"], target_lons),
                    "lat": (["x", "y"], target_lats),
                },
                coords={"time": ("time", t.values)},
            )

            ds_out.to_netcdf(str(out_folder / f"{vname}.nc"))
def main(varname=""):
    plot_utils.apply_plot_params(width_cm=22, height_cm=5, font_size=8)
    # series = get_monthly_accumulations_area_avg(data_dir="/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009",
    #                                             varname=varname)

    # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2009_monthly",
    #                                             varname=varname)

    # series = get_monthly_accumulations_area_avg(data_dir="/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_HL_1980-2009_monthly",
    #                                             varname=varname)

    hles_bin_edges = np.arange(0.1, 0.34, 0.02)

    # selected_months = [10, 11, 12, 1, 2, 3, 4, 5]
    selected_seasons = OrderedDict([("ND", [11, 12]), ("JF", [1, 2]),
                                    ("MA", [3, 4]),
                                    ("NDJFMA", [11, 12, 1, 2, 3, 4])])

    data_root = common_params.data_root

    label_to_datapath = OrderedDict([
        # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_monthly_1980-2009"),
        # ("Obs", "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_daily_Obs_monthly_icefix_1980-2009"),
        # (common_params.crcm_nemo_cur_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_1989-2010_1989-2010" / "merged"),
        # (common_params.crcm_nemo_fut_label, data_root / "lake_effect_analysis_CRCM5_NEMO_CanESM2_RCP85_2079-2100_2079-2100" / "merged"),
        (common_params.crcm_nemo_cur_label, data_root /
         "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_1989-2010_monthly_1989-2010"
         / "merged"),
        (common_params.crcm_nemo_fut_label, data_root /
         "lake_effect_analysis_CRCM5_NEMO_fix_CanESM2_RCP85_2079-2100_monthly_2079-2100"
         / "merged"),
    ])

    # longutudes and latitudes of the focus region around the Great Lakes (we define it, mostly for performance
    # issues and to eliminate regions with 0 hles that still are in the 200 km HLES zone)
    focus_region_lonlat_nc_file = data_root / "lon_lat.nc"

    label_to_series = OrderedDict()
    label_to_color = {
        common_params.crcm_nemo_cur_label: "skyblue",
        common_params.crcm_nemo_fut_label: "salmon"
    }

    gl_mask = get_gl_mask(label_to_datapath[common_params.crcm_nemo_cur_label])
    hles_region_mask = get_mask_of_points_near_lakes(gl_mask,
                                                     npoints_radius=20)

    # select a file from the directory
    sel_file = None
    for f in label_to_datapath[common_params.crcm_nemo_cur_label].iterdir():
        if f.is_file():
            sel_file = f
            break

    assert sel_file is not None, f"Could not find any files in {label_to_datapath[common_params.crcm_nemo_cur_label]}"

    # Take into account the focus region
    with xarray.open_dataset(sel_file) as ds:
        hles_region_mask_lons, hles_region_mask_lats = [
            ds[k].values for k in ["lon", "lat"]
        ]

        with xarray.open_dataset(focus_region_lonlat_nc_file) as ds_focus:
            focus_lons, focus_lats = [
                ds_focus[k].values for k in ["lon", "lat"]
            ]

        coords_src = lat_lon.lon_lat_to_cartesian(
            hles_region_mask_lons.flatten(), hles_region_mask_lats.flatten())
        coords_dst = lat_lon.lon_lat_to_cartesian(focus_lons.flatten(),
                                                  focus_lats.flatten())

        ktree = KDTree(list(zip(*coords_src)))

        dists, inds = ktree.query(list(zip(*coords_dst)), k=1)

        focus_mask = hles_region_mask.flatten()
        focus_mask[...] = False
        focus_mask[inds] = True
        focus_mask.shape = hles_region_mask.shape

    for seas_name, selected_months in selected_seasons.items():
        # read and calculate
        for label, datapath in label_to_datapath.items():
            hles_file = None

            # select hles file in the folder
            for f in datapath.iterdir():
                if f.name.endswith("_daily.nc"):
                    hles_file = f
                    break

            assert hles_file is not None, f"Could not find any HLES files in {datapath}"

            series = get_hles_amount_distribution_from_merged(
                data_file=hles_file,
                varname=varname,
                region_of_interest_mask=hles_region_mask & focus_mask,
                selected_months=selected_months,
                bin_edges=hles_bin_edges)
            label_to_series[label] = series

        #  plotting
        gs = GridSpec(1, 1, wspace=0.05)

        fig = plt.figure()
        ax = fig.add_subplot(gs[0, 0])

        # calculate bar widths
        widths = np.diff(hles_bin_edges)

        label_to_handle = OrderedDict()

        for i, (label, series) in enumerate(label_to_series.items()):
            values = series.values if hasattr(series, "values") else series

            # values = values / values.sum() * 100

            logger.debug([label, values])
            logger.debug(f"sum(values) = {sum(values)}")

            # h = ax.bar(hles_bin_edges[:-1] + i * widths / len(label_to_series), values, width=widths / len(label_to_series),
            #            align="edge", linewidth=0.5,
            #            edgecolor="k",
            #            facecolor=label_to_color[label], label=label, zorder=10)

            h = ax.plot(hles_bin_edges[:-1],
                        values,
                        color=label_to_color[label],
                        marker="o",
                        label=label,
                        markersize=2.5)
            # label_to_handle[label] = h

        ax.set_xlabel("HLES (m)")
        ax.set_title(f"HLES distribution, {seas_name}")

        ax.spines['right'].set_visible(False)
        ax.spines['top'].set_visible(False)
        # ax.set_title(common_params.varname_to_display_name[varname])
        ax.yaxis.grid(True, linestyle="--", linewidth=0.5)
        # ax.text(1, 1, "(a)", fontdict=dict(weight="bold"), transform=ax.transAxes, va="top", ha="right")
        ax_with_legend = ax
        ax.set_xlim((0.1, None))

        # area average annual total HLES
        text_align_props = dict(transform=ax.transAxes,
                                va="bottom",
                                ha="right")

        # Plot the domain and the HLES region of interest
        # ax = fig.add_subplot(gs[0, 0])
        # topo_nc_file = data_root / "geophys_452x260_me.nc"
        # ax = plot_domain_and_interest_region(ax, topo_nc_file, focus_region_lonlat_nc_file=focus_region_lonlat_nc_file)
        # ax.set_title("(a) Experimental domain")
        #
        # # Add a common legend
        labels = list(label_to_handle)
        handles = [label_to_handle[l] for l in labels]
        ax_with_legend.legend(bbox_to_anchor=(0, -0.18),
                              loc="upper left",
                              borderaxespad=0.,
                              ncol=2)

        # ax.grid()
        sel_months_str = "_".join([str(m) for m in selected_months])

        common_params.img_folder.mkdir(exist_ok=True)
        img_file = common_params.img_folder / f"{varname}_histo_amount_cc_m{sel_months_str}_domain.png"
        print(f"Saving plot to {img_file}")
        fig.savefig(img_file, **common_params.image_file_options)
Esempio n. 20
0
def main(convert_mps_to_knots=True):



    # target grid for interpolation
    nml_path = "/HOME/huziy/skynet3_rech1/obs_data_for_HLES/interploated_to_the_same_grid/obs_anuspmaurer_narr/gemclim_settings.nml"
    target_grid_config = grid_config.gridconfig_from_gemclim_settings_file(nml_path)
    print(target_grid_config)

    target_lons, target_lats = target_grid_config.get_lons_and_lats_of_gridpoint_centers()
    xt, yt, zt = lat_lon.lon_lat_to_cartesian(target_lons.flatten(), target_lats.flatten())

    # the output folder
    out_folder = Path(nml_path).parent


    # Source data for wind: NARR
    data_sources = {
        "UU": "/RESCUE/skynet3_rech1/huziy/obs_data_for_HLES/initial_data/narr/uwnd.10m.*.nc",
        "VV": "/RESCUE/skynet3_rech1/huziy/obs_data_for_HLES/initial_data/narr/vwnd.10m.*.nc"
    }

    vname_map = {
        "UU": "uwnd", "VV": "vwnd"
    }

    chunk_size = 1000

    for vname, data_path in data_sources.items():
        with xarray.open_mfdataset(data_path) as ds:
            vname_daymet = vname_map[vname]
            arr = ds[vname_daymet]

            t = ds["time"]

            ktree = get_ktree(ds)

            d, sp_inds = ktree.query(list(zip(xt, yt, zt)), k=1)

            data_out = []

            nt = len(t)
            for start_index in range(0, nt, chunk_size):
                end_index = min(start_index + chunk_size - 1, nt - 1)
                chunk = end_index - start_index + 1

                arr_sel = arr[start_index:end_index + 1, :, :].to_masked_array()
                print(arr_sel.shape)

                data = arr_sel.reshape((chunk, -1))[:, sp_inds].reshape((chunk, ) + target_lons.shape)
                data_out.append(data)

            # ---
            data_out = np.concatenate(data_out, axis=0)

            if convert_mps_to_knots:
                data_out *= mps_per_knot



            ds_out = xarray.Dataset(
                data_vars={
                    vname: (["time", "x", "y"], data_out),
                },
                coords={"time": ("time", t.values),
                        "lon": (["x", "y"], target_lons),
                        "lat": (["x", "y"], target_lats),
                },
            )
            ds_out[vname].attrs.units = "knots"


            ds_out.to_netcdf(str(out_folder / f"{vname}.nc"))
Esempio n. 21
0
    def get_daily_percenile_fields_interpolated_to(
            self,
            lons_target,
            lats_target,
            start_year=-np.Inf,
            end_year=np.Inf,
            percentile=0.5,
            rolling_mean_window_days=None):
        target_scale_deg = (lons_target[1, 1] - lons_target[0, 0] +
                            lats_target[1, 1] - lats_target[0, 0]) / 2.0

        coarsening = int(target_scale_deg / self.characteristic_scale_deg +
                         0.5)
        print("source_scale: {}\ntarget_scale: {}\ncoarsening coefficient: {}".
              format(self.characteristic_scale_deg, target_scale_deg,
                     coarsening))

        def coarsening_func(x, axis=None):
            _mask = np.less(np.abs(x - self.missing_value), 1.0e-6)

            if np.all(_mask):
                return self.missing_value * np.ma.ones(
                    _mask.shape).mean(axis=axis)

            y = np.ma.masked_where(_mask, x)

            return y.mean(axis=axis)

        # aggregate the data
        trim_excess = True
        data = da.coarsen(coarsening_func,
                          self.data,
                          axes={
                              1: coarsening,
                              2: coarsening
                          },
                          trim_excess=trim_excess)
        lons_s = da.coarsen(np.mean,
                            da.from_array(self.lons, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()
        lats_s = da.coarsen(np.mean,
                            da.from_array(self.lats, self.chunks[1:]),
                            axes={
                                0: coarsening,
                                1: coarsening
                            },
                            trim_excess=trim_excess).compute()

        source_grid = list(
            zip(*lat_lon.lon_lat_to_cartesian(lons_s.flatten(),
                                              lats_s.flatten())))
        print(np.shape(source_grid))
        ktree = KDTree(source_grid)

        dists, inds = ktree.query(
            list(
                zip(*lat_lon.lon_lat_to_cartesian(lons_target.flatten(),
                                                  lats_target.flatten()))))

        perc_daily, mask = self.get_daily_percenile_fields_lazy(
            data,
            start_year=start_year,
            end_year=end_year,
            percentile=percentile,
            rolling_mean_window_days=rolling_mean_window_days)

        print("perc_daily.shape=", perc_daily.shape)

        # do the interpolation for each day
        perc_daily_interpolated = []
        for perc_field in perc_daily:
            print(perc_field.shape)
            field = np.ma.masked_where(
                mask, perc_field.compute()).flatten()[inds].reshape(
                    lons_target.shape)
            perc_daily_interpolated.append(field)

        return np.array(perc_daily_interpolated)