예제 #1
0
    def get_lake_fraction_for_date(self, the_date=None):

        """
        Get the lake ice cover fraction for the specified date
        :param the_date:
        :return:
        """
        if the_date not in self.cached_data:
            month_date = datetime(the_date.year, the_date.month, 1)

            self.cached_data = {}
            mr = MultiRPN(self.year_month_date_to_file_list[month_date])
            self.cached_data = mr.get_all_time_records_for_name_and_level(varname=self.varname, level=self.level,
                                                                          level_kind=self.level_kind)
            mr.close()

        # if still it is not there try looking in the previous month
        if the_date not in self.cached_data:

            month = the_date.month - 1
            year = the_date.year
            if the_date.month == 0:
                month = 12
                year -= 1


            month_date = datetime(year, month, 1)

            mr = MultiRPN(self.year_month_date_to_file_list[month_date])
            self.cached_data.update(mr.get_all_time_records_for_name_and_level(varname=self.varname, level=self.level,
                                                                               level_kind=self.level_kind))
            mr.close()

        return self.cached_data[the_date]
예제 #2
0
    def get_lake_fraction_for_date(self, the_date=None):

        """
        Get the lake ice cover fraction for the specified date
        :param the_date:
        :return:
        """
        if the_date not in self.cached_data:
            month_date = datetime(the_date.year, the_date.month, 1)

            self.cached_data = {}
            mr = MultiRPN(self.year_month_date_to_file_list[month_date])
            self.cached_data = mr.get_all_time_records_for_name_and_level(
                varname=self.varname, level=self.level, level_kind=self.level_kind
            )
            mr.close()

        # if still it is not there try looking in the previous month
        if the_date not in self.cached_data:

            month = the_date.month - 1
            year = the_date.year
            if the_date.month == 0:
                month = 12
                year -= 1

            month_date = datetime(year, month, 1)

            mr = MultiRPN(self.year_month_date_to_file_list[month_date])
            self.cached_data.update(
                mr.get_all_time_records_for_name_and_level(
                    varname=self.varname, level=self.level, level_kind=self.level_kind
                )
            )
            mr.close()

        return self.cached_data[the_date]
예제 #3
0
def extract_runoff_to_nc_process(args):
    in_path, out_path = args

    if os.path.exists(out_path):
        print("Nothing to do for: {}".format(out_path))
        return  # skip files that already exist


    traf_name = "TRAF"
    tdra_name = "TDRA"

    r = MultiRPN(in_path)
    traf_data = r.get_all_time_records_for_name_and_level(varname=traf_name, level=5, level_kind=level_kinds.ARBITRARY)
    tdra_data = r.get_all_time_records_for_name_and_level(varname=tdra_name, level=5, level_kind=level_kinds.ARBITRARY)
    r.close()

    nx, ny = list(traf_data.items())[0][1].shape

    with nc.Dataset(out_path, "w", format="NETCDF3_CLASSIC") as ds:
        ds.createDimension("lon", nx)
        ds.createDimension("lat", ny)
        ds.createDimension("time", None)

        varTraf = ds.createVariable(traf_name, "f4", dimensions=("time", "lon", "lat"))
        varTraf.units = "kg/( m**2 * s )"

        varTdra = ds.createVariable(tdra_name, "f4", dimensions=("time", "lon", "lat"))
        varTdra.units = "kg/( m**2 * s )"

        timeVar = ds.createVariable("time", "f4", dimensions=("time",))

        sorted_dates = list(sorted(traf_data.keys()))

        timeVar.units = "hours since {0}".format(sorted_dates[0])
        timeVar[:] = nc.date2num(sorted_dates, timeVar.units)

        varTraf[:] = np.array(
            [traf_data[d] for d in sorted_dates]
        )

        varTdra[:] = np.array(
            [tdra_data[d] for d in sorted_dates]
        )
예제 #4
0
def extract_runoff_to_nc_process(args):
    in_path, out_path = args

    if os.path.exists(out_path):
        print("Nothing to do for: {}".format(out_path))
        return  # skip files that already exist


    traf_name = "TRAF"
    tdra_name = "TDRA"

    r = MultiRPN(in_path)
    traf_data = r.get_all_time_records_for_name_and_level(varname=traf_name, level=5, level_kind=level_kinds.ARBITRARY)
    tdra_data = r.get_all_time_records_for_name_and_level(varname=tdra_name, level=5, level_kind=level_kinds.ARBITRARY)
    r.close()

    nx, ny = list(traf_data.items())[0][1].shape

    with nc.Dataset(out_path, "w", format="NETCDF3_CLASSIC") as ds:
        ds.createDimension("lon", nx)
        ds.createDimension("lat", ny)
        ds.createDimension("time", None)

        varTraf = ds.createVariable(traf_name, "f4", dimensions=("time", "lon", "lat"))
        varTraf.units = "kg/( m**2 * s )"

        varTdra = ds.createVariable(tdra_name, "f4", dimensions=("time", "lon", "lat"))
        varTdra.units = "kg/( m**2 * s )"

        timeVar = ds.createVariable("time", "f4", dimensions=("time",))

        sorted_dates = list(sorted(traf_data.keys()))

        timeVar.units = "hours since {0}".format(sorted_dates[0])
        timeVar[:] = nc.date2num(sorted_dates, timeVar.units)

        varTraf[:] = np.array(
            [traf_data[d] for d in sorted_dates]
        )

        varTdra[:] = np.array(
            [tdra_data[d] for d in sorted_dates]
        )
예제 #5
0
def get_seasonal_means_from_rpn_monthly_folders(samples_dir="", season_to_months=None, start_year=-np.Inf, end_year=np.Inf,
                                                filename_prefix="pm", varname="", level=-1, level_kind=-1):

    result = OrderedDict()

    season_to_files = {s: [] for s in season_to_months}

    smpls_dir = Path(samples_dir)


    for month_folder in smpls_dir.iterdir():


        y, m = get_year_and_month(month_folder.name)


        # skip if the year is not in the selected range
        if not start_year <= y <= end_year:
            continue

        for season, months in season_to_months.items():
            if m in months:

                for data_file in month_folder.iterdir():

                    if data_file.name[-9:-1] == 8 * "0":
                        continue

                    if data_file.name.startswith(filename_prefix):
                        season_to_files[season].append(str(data_file))



    for season, months in season_to_months.items():
        print("{} => {}".format(season, months))
        mrpn = MultiRPN(season_to_files[season])
        date_to_field = mrpn.get_all_time_records_for_name_and_level(varname=varname, level=level, level_kind=level_kind)
        result[season] = np.mean([field for field in date_to_field.values()], axis=0)

    return result
def main():
    path_to_nemo_outputs = "/RESCUE/skynet3_rech1/huziy/one_way_coupled_nemo_outputs_1979_1985/GLK_1d_grid_T.nc"
    vname_nemo = "sosstsst"

    path_to_crcm5_outputs = "/home/huziy/skynet3_rech1/glk-oneway-coupled-crcm5-outputs/coupled-GL/Samples"
    vname_crcm5 = "TT"

    # month of interest
    month = 4
    year = 1981

    mrpn = MultiRPN("{}/*_{}{}/dm*".format(path_to_crcm5_outputs, year, month))
    data = mrpn.get_all_time_records_for_name_and_level(varname=vname_crcm5, level=1, level_kind=level_kinds.HYBRID)

    # Calculate the monthly mean fields in both cases
    assert isinstance(data, dict)
    mm_crcm5 = np.array(list(data.values())).mean(axis=0)

    print("crcm5-out-shape = ", mm_crcm5.shape)
    with Dataset(path_to_nemo_outputs) as ds:
        mm_nemo = ds.variables[vname_nemo][:].mean(axis=0)

    print("nemo-out-shape = ", mm_nemo.shape)
예제 #7
0
def main():
    path_to_nemo_outputs = "/RESCUE/skynet3_rech1/huziy/one_way_coupled_nemo_outputs_1979_1985/GLK_1d_grid_T.nc"
    vname_nemo = "sosstsst"

    path_to_crcm5_outputs = "/home/huziy/skynet3_rech1/glk-oneway-coupled-crcm5-outputs/coupled-GL/Samples"
    vname_crcm5 = "TT"

    # month of interest
    month = 4
    year = 1981

    mrpn = MultiRPN("{}/*_{}{}/dm*".format(path_to_crcm5_outputs, year, month))
    data = mrpn.get_all_time_records_for_name_and_level(
        varname=vname_crcm5, level=1, level_kind=level_kinds.HYBRID)

    # Calculate the monthly mean fields in both cases
    assert isinstance(data, dict)
    mm_crcm5 = np.array(list(data.values())).mean(axis=0)

    print("crcm5-out-shape = ", mm_crcm5.shape)
    with Dataset(path_to_nemo_outputs) as ds:
        mm_nemo = ds.variables[vname_nemo][:].mean(axis=0)

    print("nemo-out-shape = ", mm_nemo.shape)
예제 #8
0
def main():
    #path = "/RECH/data/Simulations/CRCM5/North_America/NorthAmerica_0.44deg_ERA40-Int_B1/Diagnostics/NorthAmerica_0.44deg_ERA40-Int_B1_2007{:02d}"
    path = "/RESCUE/skynet3_rech1/huziy/from_guillimin/new_outputs/current_climate_30_yr_sims/quebec_0.1_crcm5-hcd-rl-intfl_ITFS/Samples/quebec_crcm5-hcd-rl-intfl_1988{:02d}"

    months = [6, 7, 8]

    pm_list = []
    dm_list = []
    for m in months:
        print(path.format(m))

        month_folder = path.format(m)
        for fn in os.listdir(month_folder):

            # if not fn.endswith("moyenne"):
            #    continue

            if fn.startswith("pm"):
                pm_list.append(os.path.join(month_folder, fn))
            elif fn.startswith("dm"):
                dm_list.append(os.path.join(month_folder, fn))



    pm = MultiRPN(pm_list)
    dm = MultiRPN(dm_list)

    tsurf_mean = np.mean([field for field in pm.get_all_time_records_for_name_and_level(varname="J8").values()], axis=0)
    tair_mean = np.mean([field for field in dm.get_all_time_records_for_name_and_level(varname="TT", level=1, level_kind=level_kinds.HYBRID).values()], axis=0)


    lons, lats = pm.get_longitudes_and_latitudes_of_the_last_read_rec()

    projparams = pm.linked_robj_list[0].get_proj_parameters_for_the_last_read_rec()

    rll = RotatedLatLon(**projparams)
    bmp = rll.get_basemap_object_for_lons_lats(lons2d=lons, lats2d=lats)

    xx, yy = bmp(lons, lats)


    plt.figure()
    cs = bmp.contourf(xx, yy, tsurf_mean - 273.15, 40)
    bmp.drawcoastlines()
    plt.title("Tsurf")
    plt.colorbar()

    plt.figure()
    bmp.contourf(xx, yy, tair_mean, levels=cs.levels, norm=cs.norm, cmap=cs.cmap)
    bmp.drawcoastlines()
    plt.title("Tair")
    plt.colorbar()

    plt.figure()
    bmp.contourf(xx, yy, tsurf_mean - 273.15 - tair_mean, levels=np.arange(-2, 2.2, 0.2), cmap=cs.cmap)
    bmp.drawcoastlines()
    plt.title("Tsurf - Tair")
    plt.colorbar()



    pm.close()
    dm.close()

    plt.show()
예제 #9
0
def main(
        sim_path="/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected/",
        fpath_pattern="var_per_file",
        fname_prefix=None,
        coords_filename="pm1979010100_00000000p"):

    start_year = 1980
    end_year = 1982

    vname = "SD"

    level = 6
    level_kind = level_kinds.ARBITRARY

    lkfr = get_lake_fraction(sim_path, fname=coords_filename)

    data_series = []

    year_to_paths_cache = defaultdict(list)

    data = None
    for y in range(start_year, end_year + 1):

        if fpath_pattern == "var_per_file":
            r = MultiRPN("{}/*{}*/*{}*".format(sim_path, y, vname))
            data = r.get_all_time_records_for_name_and_level(
                vname, level=level, level_kind=level_kind)
        elif fpath_pattern == "default":

            sim_dir = Path(sim_path)

            # Create the map in order to reuse
            if len(year_to_paths_cache) == 0:
                for month_dir in sim_dir.iterdir():
                    year_to_paths_cache[y].append(month_dir)

            for month_dir in year_to_paths_cache[y]:
                for fpath in month_dir.iterdir():

                    # print(fpath)

                    # Check if the prefix is OK
                    if not fpath.name.startswith(fname_prefix):
                        continue

                    # Check if this is not a timestep 0
                    if fpath.name[:-1].endswith(8 * "0"):
                        continue

                    with RPN(str(fpath)) as r:
                        data_tmp = r.get_all_time_records_for_name_and_level(
                            vname, level=level, level_kind=level_kind)
                        if data is None:
                            data = data_tmp
                        else:
                            data.update(data_tmp)

        else:
            raise Exception(
                "Unknown file path pattern: {}".format(fpath_pattern))

        ts_for_year = {}
        for d, field in data.items():
            ts_for_year[d] = field[lkfr > 0.5].mean()

        data_series.append(pd.Series(ts_for_year))

    total_series = pd.concat(data_series)
    assert isinstance(total_series, pd.Series)
    ax = total_series.plot(title="{}-{}".format(start_year, end_year))
    plt.show()
예제 #10
0
def main():
    erainterim_075_folder = "/HOME/data/Validation/ERA-Interim_0.75/Offline_driving_data/3h_Forecast"

    vname = "PR"
    start_year = 1980
    end_year = 2010


    season_key = "summer"
    season_labels = {season_key: "Summer"}
    season_to_months = OrderedDict([
        (season_key, [6, 7, 8])
    ])


    # Validate temperature and precip
    model_vars = ["TT", "PR"]
    obs_vars = ["tmp", "pre"]

    obs_paths = [
        "/HOME/data/Validation/CRU_TS_3.1/Original_files_gzipped/cru_ts_3_10.1901.2009.tmp.dat.nc",
        "/HOME/data/Validation/CRU_TS_3.1/Original_files_gzipped/cru_ts_3_10.1901.2009.pre.dat.nc"
    ]

    model_var_to_obs_var = dict(zip(model_vars, obs_vars))
    model_var_to_obs_path = dict(zip(model_vars, obs_paths))

    obs_path = model_var_to_obs_path[vname]

    cru = CRUDataManager(var_name=model_var_to_obs_var[vname], path=obs_path)

    # Calculate seasonal means for CRU
    seasonal_clim_fields_cru = cru.get_seasonal_means(season_name_to_months=season_to_months,
                                                      start_year=start_year,
                                                      end_year=end_year)



    # Calculate seasonal mean for erai
    flist = get_files_for_season(erainterim_075_folder, start_year=start_year, end_year=end_year, months=season_to_months[season_key])

    rpf = MultiRPN(flist)
    date_to_field_erai075 = rpf.get_all_time_records_for_name_and_level(varname=vname, level=-1)

    # Convert to mm/day
    era075 = np.mean([field for field in date_to_field_erai075.values()], axis=0) * 24 * 3600 * 1000
    lons_era, lats_era = rpf.get_longitudes_and_latitudes_of_the_last_read_rec()



    seasonal_clim_fields_cru_interp = OrderedDict()

    # Calculate biases
    for season, cru_field in seasonal_clim_fields_cru.items():
        seasonal_clim_fields_cru_interp[season] = cru.interpolate_data_to(cru_field,
                                                                          lons2d=lons_era,
                                                                          lats2d=lats_era,
                                                                          nneighbours=1)




    # Do the plotting ------------------------------------------------------------------------------
    plot_utils.apply_plot_params()
    fig = plt.figure()


    b = Basemap()
    gs = gridspec.GridSpec(nrows=3, ncols=1)


    ax = fig.add_subplot(gs[0, 0])
    xx, yy = b(cru.lons2d, cru.lats2d)
    cs = b.contourf(xx, yy, seasonal_clim_fields_cru[season_key], 20)
    b.drawcoastlines(ax=ax)
    ax.set_title("CRU")
    plt.colorbar(cs, ax=ax)


    ax = fig.add_subplot(gs[1, 0])



    lons_era[lons_era > 180] -= 360
    lons_era, era075 = b.shiftdata(lons_era, datain=era075, lon_0=0)
    xx, yy = b(lons_era, lats_era)

    # mask oceans in the era plot as well
    era075 = maskoceans(lons_era, lats_era, era075)

    cs = b.contourf(xx, yy, era075, levels=cs.levels, norm=cs.norm, cmap=cs.cmap, ax=ax)
    b.drawcoastlines(ax=ax)
    ax.set_title("ERA-Interim 0.75")
    plt.colorbar(cs, ax=ax)


    # differences
    ax = fig.add_subplot(gs[2, 0])
    diff = era075 - seasonal_clim_fields_cru_interp[season_key]
    delta = np.percentile(np.abs(diff)[~diff.mask], 90)
    clevs = np.linspace(-delta, delta, 20)

    cs = b.contourf(xx, yy, diff, levels=clevs, cmap="RdBu_r", extend="both")
    b.drawcoastlines(ax=ax)
    ax.set_title("ERA-Interim 0.75 - CRU")
    plt.colorbar(cs, ax=ax)

    plt.show()

    fig.savefig(os.path.join(img_folder, "erai0.75_vs_cru_precip.png"), bbox_inches="tight")
예제 #11
0
def main():
    start_year = 1979
    end_year = 1981

    HL_LABEL = "CRCM5_HL"
    NEMO_LABEL = "CRCM5_NEMO"

    file_prefix = "dm"
    level = 1
    level_type = level_kinds.HYBRID

    wind_comp_names = ["UU", "VV"]


    sim_label_to_path = OrderedDict(
        [(HL_LABEL, "/RESCUE/skynet3_rech1/huziy/CNRCWP/C5/2016/2-year-runs/coupled-GL+stfl_oneway/Samples"),
         (NEMO_LABEL, "/HOME/huziy/skynet3_rech1/CNRCWP/C5/2016/2-year-runs/coupled-GL+stfl/Samples")]
    )

    # get a coord file ...
    coord_file = ""
    found_coord_file = False
    for mdir in os.listdir(sim_label_to_path[HL_LABEL]):

        mdir_path = os.path.join(sim_label_to_path[HL_LABEL], mdir)
        if not os.path.isdir(mdir_path):
            continue

        for fn in os.listdir(mdir_path):
            print(fn)
            if fn[:2] not in ["pm", "dm", "pp", "dp"]:
                continue

            coord_file = os.path.join(mdir_path, fn)
            found_coord_file = True

        if found_coord_file:
            break

    bmp, lons, lats = nemo_hl_util.get_basemap_obj_and_coords_from_rpn_file(path=coord_file)
    xx, yy = bmp(lons, lats)
    lons[lons > 180] -= 360


    # loop through all files rotate vaectors and save to netcdf
    for sim_label, samples_dir in sim_label_to_path.items():

        samples = Path(samples_dir)
        po = samples.parent

        monthdate_to_path_list = nemo_hl_util.get_monthyeardate_to_paths_map(file_prefix=file_prefix,
                                                                             start_year=start_year, end_year=end_year,
                                                                             samples_dir_path=samples)

        # Netcdf output file to put rotated winds
        po /= "rotated_wind_{}.nc".format(sim_label)

        with Dataset(str(po), "w") as ds:

            ds.createDimension("time", None)
            ds.createDimension("lon", lons.shape[0])
            ds.createDimension("lat", lons.shape[1])

            # create the schema of the output file
            vname_to_ncvar = {}
            for vname in wind_comp_names:
                vname_to_ncvar[vname] = ds.createVariable(vname, "f4", dimensions=("time", "lon", "lat"))
                vname_to_ncvar[vname].units = "knots"

            lons_var = ds.createVariable("lon", "f4", dimensions=("lon", "lat"))
            lats_var = ds.createVariable("lat", "f4", dimensions=("lon", "lat"))
            time_var = ds.createVariable("time", "i8", dimensions=("time",))
            time_var.units = "hours since {:%Y-%m-%d %H:%M:%S}".format(datetime(start_year, 1, 1))

            lons_var[:] = lons
            lats_var[:] = lats


            # use sorted dates
            record_count = 0


            for month_date in sorted(monthdate_to_path_list):
                # select only dm files
                mr = MultiRPN(path=monthdate_to_path_list[month_date])


                vname_to_fields = {}
                for vname in wind_comp_names:
                    vname_to_fields[vname] = mr.get_all_time_records_for_name_and_level(varname=vname, level=level, level_kind=level_type)

                for ti, t in enumerate(sorted(vname_to_fields[wind_comp_names[0]])):
                    time_var[record_count] = date2num(t, time_var.units)

                    uu = vname_to_fields[wind_comp_names[0]][t]
                    vv = vname_to_fields[wind_comp_names[1]][t]

                    uu_rot, vv_rot = rotate_vecs_from_geo_to_rotpole(uu, vv, lons, lats, bmp=bmp)


                    # in knots not in m/s
                    vname_to_ncvar[wind_comp_names[0]][record_count, :, :] = uu_rot
                    vname_to_ncvar[wind_comp_names[1]][record_count, :, :] = vv_rot
                    record_count += 1
예제 #12
0
def plot_monthly_clim_in_a_panel(months=None,
                                 diag_folder="",
                                 vname="STFL",
                                 grid_config=None,
                                 basins_of_interest_shp=""):
    """
    Plots climatologies using diagnostics outputs, not samples
    :param months:
    :param diag_folder:
    :param vname:
    """
    if months is None:
        months = list(range(1, 13))

    diag_path = Path(diag_folder)
    month_to_field = OrderedDict()

    lons, lats, bmp = None, None, None
    data_mask = None

    for m in months:

        r = MultiRPN(
            str(
                diag_path.joinpath(
                    "*{:02d}".format(m)).joinpath("pm*_moyenne")))

        date_to_field = r.get_all_time_records_for_name_and_level()

        the_mean = np.mean([f for f in date_to_field.values()], axis=0)

        the_mean = np.ma.masked_where(the_mean < 0, the_mean)

        month_to_field[m] = the_mean

        if bmp is None:
            lons, lats = r.get_longitudes_and_latitudes_of_the_last_read_rec()

            # get the basemap object
            bmp, data_mask = grid_config.get_basemap_using_shape_with_polygons_of_interest(
                lons, lats, shp_path=basins_of_interest_shp, mask_margin=5)

        r.close()

    fig = plt.figure()

    ncols = 3
    nrows = len(months) // ncols + int(len(months) % ncols != 0)

    gs = GridSpec(nrows=nrows, ncols=ncols + 1)

    xx, yy = bmp(lons, lats)

    clevs = [
        0, 20, 50, 100, 200, 500, 1000, 1500, 3000, 4500, 5000, 7000, 9000
    ]
    bn = BoundaryNorm(clevs, len(clevs) - 1)
    cmap = cm.get_cmap("jet", len(clevs) - 1)
    for m, field in month_to_field.items():
        row = (m - 1) // ncols
        col = (m - 1) % ncols

        ax = fig.add_subplot(gs[row, col])
        ax.set_title(calendar.month_name[m])

        to_plot = np.ma.masked_where(~data_mask, field)
        im = bmp.pcolormesh(xx,
                            yy,
                            to_plot,
                            norm=bn,
                            cmap=cmap,
                            vmin=clevs[0],
                            vmax=clevs[-1])
        bmp.colorbar(im, extend="max")

        bmp.readshapefile(basins_of_interest_shp[:-4],
                          "basins",
                          linewidth=2,
                          color="m",
                          ax=ax)
        bmp.drawcoastlines(ax=ax)

    plt.close(fig)

    # plot annual mean
    ann_mean = np.mean([field for m, field in month_to_field.items()], axis=0)
    fig = plt.figure()
    ax = fig.add_subplot(gs[:, :])
    ax.set_title("Annual mean")

    to_plot = np.ma.masked_where(~data_mask, ann_mean)
    im = bmp.pcolormesh(xx,
                        yy,
                        to_plot,
                        norm=bn,
                        cmap=cmap,
                        vmin=clevs[0],
                        vmax=clevs[-1])
    bmp.colorbar(im, extend="max")

    bmp.readshapefile(basins_of_interest_shp[:-4],
                      "basins",
                      linewidth=2,
                      color="m",
                      ax=ax)
    bmp.drawcoastlines(ax=ax)

    plt.show()
    plt.close(fig)
예제 #13
0
def main():
    #path = "/RECH/data/Simulations/CRCM5/North_America/NorthAmerica_0.44deg_ERA40-Int_B1/Diagnostics/NorthAmerica_0.44deg_ERA40-Int_B1_2007{:02d}"
    path = "/RESCUE/skynet3_rech1/huziy/from_guillimin/new_outputs/current_climate_30_yr_sims/quebec_0.1_crcm5-hcd-rl-intfl_ITFS/Samples/quebec_crcm5-hcd-rl-intfl_1988{:02d}"

    months = [6, 7, 8]

    pm_list = []
    dm_list = []
    for m in months:
        print(path.format(m))

        month_folder = path.format(m)
        for fn in os.listdir(month_folder):

            # if not fn.endswith("moyenne"):
            #    continue

            if fn.startswith("pm"):
                pm_list.append(os.path.join(month_folder, fn))
            elif fn.startswith("dm"):
                dm_list.append(os.path.join(month_folder, fn))

    pm = MultiRPN(pm_list)
    dm = MultiRPN(dm_list)

    tsurf_mean = np.mean([
        field for field in pm.get_all_time_records_for_name_and_level(
            varname="J8").values()
    ],
                         axis=0)
    tair_mean = np.mean([
        field for field in dm.get_all_time_records_for_name_and_level(
            varname="TT", level=1, level_kind=level_kinds.HYBRID).values()
    ],
                        axis=0)

    lons, lats = pm.get_longitudes_and_latitudes_of_the_last_read_rec()

    projparams = pm.linked_robj_list[
        0].get_proj_parameters_for_the_last_read_rec()

    rll = RotatedLatLon(**projparams)
    bmp = rll.get_basemap_object_for_lons_lats(lons2d=lons, lats2d=lats)

    xx, yy = bmp(lons, lats)

    plt.figure()
    cs = bmp.contourf(xx, yy, tsurf_mean - 273.15, 40)
    bmp.drawcoastlines()
    plt.title("Tsurf")
    plt.colorbar()

    plt.figure()
    bmp.contourf(xx,
                 yy,
                 tair_mean,
                 levels=cs.levels,
                 norm=cs.norm,
                 cmap=cs.cmap)
    bmp.drawcoastlines()
    plt.title("Tair")
    plt.colorbar()

    plt.figure()
    bmp.contourf(xx,
                 yy,
                 tsurf_mean - 273.15 - tair_mean,
                 levels=np.arange(-2, 2.2, 0.2),
                 cmap=cs.cmap)
    bmp.drawcoastlines()
    plt.title("Tsurf - Tair")
    plt.colorbar()

    pm.close()
    dm.close()

    plt.show()
def get_model_data(station_to_model_point, output_path=None, grid_config=None, basins_of_interest_shp="",
                   cell_manager=None, vname=None):
    lons, lats, bmp = None, None, None
    data_mask = None

    monthly_diagnostics_case = False
    if output_path.name.lower().endswith("diagnostics"):
        fname_pattern = "pm*_moyenne"
        monthly_diagnostics_case = True
    else:
        fname_pattern = "pm*p"

    pattern = re.compile(".*" + 8 * "0" + ".*")

    flist = [f for f in glob.glob(str(output_path.joinpath("*").joinpath(fname_pattern))) if pattern.match(f) is None]
    r = MultiRPN(flist)

    date_to_field = r.get_all_time_records_for_name_and_level(varname=vname)
    lons, lats = r.get_longitudes_and_latitudes_of_the_last_read_rec()

    r.close()

    # get the basemap object
    bmp, data_mask = grid_config.get_basemap_using_shape_with_polygons_of_interest(
        lons, lats, shp_path=basins_of_interest_shp, mask_margin=5)

    station_to_model_data = {}  # model data are the pandas timeseries

    stations_to_ignore = []

    for station, model_point in station_to_model_point.items():
        assert isinstance(model_point, ModelPoint)
        assert isinstance(cell_manager, CellManager)
        assert isinstance(station, cehq_station.Station)

        upstream_mask = cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(model_point.ix,
                                                                                          model_point.jy)

        # Skip model points and staions with small number of gridcells upstream
        if upstream_mask.sum() <= 1:
            stations_to_ignore.append(station)
            print("Station {} is ignored, because the number of upstream cells is <= 1.".format(station.id))
            continue

        # Skip model points and stations outside the region of interest
        if not data_mask[model_point.ix, model_point.jy]:
            stations_to_ignore.append(station)
            print("Station {} is ignored, because it is outside of the domain of interest.".format(station.id))
            continue

        # Plot station position
        fig = plt.figure()

        ax = plt.gca()

        lons1, lats1 = lons[upstream_mask > 0.5], lats[upstream_mask > 0.5]
        x1, y1 = bmp(lons1, lats1)

        bmp.drawrivers()
        bmp.drawcoastlines(ax=ax)
        bmp.drawcountries(ax=ax, linewidth=0.2)
        bmp.drawstates(linewidth=0.1)
        bmp.readshapefile(basins_of_interest_shp[:-4], "basin", linewidth=2, color="m")

        bmp.scatter(x1, y1, c="g", s=100)
        bmp.scatter(*bmp(lons[model_point.ix, model_point.jy], lats[model_point.ix, model_point.jy]), c="b", s=250)

        fig.tight_layout()
        plt.savefig(str(img_folder.joinpath("{}_position_and_upstream.png".format(station.id))), bbox_inche="tight")
        plt.close(fig)


        res = pd.Series(index=sorted(date_to_field.keys()),
                        data=[date_to_field[d][model_point.ix, model_point.jy] for d in sorted(date_to_field.keys())])

        # get monthly means
        res = res.groupby(lambda d: d.replace(day=15, hour=0)).mean()

        if monthly_diagnostics_case:
            # shift to the end of the month before a previous month, and then shift 15 days to later
            res = res.shift(-2, freq="M").shift(15, freq="D")

        print(res.index[:20])
        print(res.index[-20:])

        station_to_model_data[station] = res


    # Not enough drainage area
    for s in stations_to_ignore:
        del station_to_model_point[s]

    return station_to_model_data
def main():

    vname = "VV"
    start_year = 1980
    end_year = 2010

    crcm_data_path = "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5"

    months_of_interest = [6, 7, 8]  # summer


    summer_crcm = analysis.get_seasonal_climatology(hdf_path=crcm_data_path, start_year=start_year, end_year=end_year,
                                                    level=0, var_name=vname, months=months_of_interest)

    bmp_info = analysis.get_basemap_info_from_hdf(file_path=crcm_data_path)


    erainterim_15_folder = "/RECH/data/Driving_data/Pilots/ERA-Interim_1.5/Pilots/"


    flist_for_season = get_files_for_season(erainterim_15_folder, start_year=start_year, end_year=end_year, months=months_of_interest)

    rpf = MultiRPN(flist_for_season)

    date_to_hu_erai15 = rpf.get_all_time_records_for_name_and_level(varname=vname, level=1000, level_kind=level_kinds.PRESSURE)
    summer_era15 = np.mean([field for field in date_to_hu_erai15.values()], axis=0)

    lons_era, lats_era = rpf.get_longitudes_and_latitudes_of_the_last_read_rec()

    # plotting

    # ERA-Interim
    plt.figure()
    b = Basemap(lon_0=180)
    xxg, yyg = b(lons_era, lats_era)
    im = b.contourf(xxg, yyg, summer_era15, 40, zorder=1)


    lonsr = bmp_info.lons.copy()
    lonsr[lonsr < 180] += 360
    xxr, yyr = b(lonsr, bmp_info.lats)
    b.contourf(xxr, yyr, summer_crcm, levels=im.levels, norm=im.norm, cmap=im.cmap, zorder=2)

    b.drawcoastlines(zorder=3)
    plt.colorbar(im)

    # CRCM (plot both crcm and era on the same plot)
    fig = plt.figure()
    xx, yy = bmp_info.get_proj_xy()

    margin = 20
    bext = bmp_info.basemap_for_extended_region(marginx=10 * margin, marginy=10 * margin)
    bmiddle = bmp_info.basemap_for_extended_region(marginx=9 * margin, marginy=9 * margin)

    xxg, yyg = bext(lons_era, lats_era)

    outer_domain = (xxg <= bext.urcrnrx) & (xxg >= bext.llcrnrx) & (yyg <= bext.urcrnry) & (yyg >= bext.llcrnry)



    summer_era15 = np.ma.masked_where(~outer_domain, summer_era15)

    im = bext.contourf(xx, yy, summer_crcm, levels=im.levels, norm=im.norm, cmap=im.cmap, zorder=2)
    bmiddle.contourf(xxg, yyg, summer_era15, levels=im.levels, norm=im.norm, cmap=im.cmap, zorder=1)
    bext.drawcoastlines()
    plt.colorbar(im)


    # Add a polygon
    ax = plt.gca()
    coords = np.array([
        [xx[0, 0], yy[0, 0]], [xx[0, -1], yy[0, -1]], [xx[-1, -1], yy[-1, -1]], [xx[-1, 0], yy[-1, 0]]
    ])
    ax.add_patch(Polygon(coords, facecolor="none", lw=3, zorder=3, edgecolor="k"))




    img_folder = "cc-paper-comments"
    if not os.path.isdir(img_folder):
        os.mkdir(img_folder)

    fig.savefig(os.path.join(img_folder, "{}_era_1.5_and_crcm.png".format(vname)), bbox_inches="tight", transparent=True)

    plt.show()
예제 #16
0
def main(sim_path="/RECH2/huziy/coupling/GL_440x260_0.1deg_GL_with_Hostetler/Samples_selected/",
         fpath_pattern="var_per_file",
         fname_prefix=None, coords_filename="pm1979010100_00000000p"):



    start_year = 1980
    end_year = 1982

    vname = "SD"

    level = 6
    level_kind = level_kinds.ARBITRARY

    lkfr = get_lake_fraction(sim_path, fname=coords_filename)


    data_series = []

    year_to_paths_cache = defaultdict(list)

    data = None
    for y in range(start_year, end_year + 1):

        if fpath_pattern == "var_per_file":
            r = MultiRPN("{}/*{}*/*{}*".format(sim_path, y, vname))
            data = r.get_all_time_records_for_name_and_level(vname, level=level, level_kind=level_kind)
        elif fpath_pattern == "default":

            sim_dir = Path(sim_path)

            # Create the map in order to reuse
            if len(year_to_paths_cache) == 0:
                for month_dir in sim_dir.iterdir():
                    year_to_paths_cache[y].append(month_dir)

            for month_dir in year_to_paths_cache[y]:
                for fpath in month_dir.iterdir():

                    # print(fpath)

                    # Check if the prefix is OK
                    if not fpath.name.startswith(fname_prefix):
                        continue


                    # Check if this is not a timestep 0
                    if fpath.name[:-1].endswith(8 * "0"):
                        continue

                    with RPN(str(fpath)) as r:
                        data_tmp = r.get_all_time_records_for_name_and_level(vname, level=level, level_kind=level_kind)
                        if data is None:
                            data = data_tmp
                        else:
                            data.update(data_tmp)


        else:
            raise Exception("Unknown file path pattern: {}".format(fpath_pattern))



        ts_for_year = {}
        for d, field in data.items():
            ts_for_year[d] = field[lkfr > 0.5].mean()

        data_series.append(pd.Series(ts_for_year))

    total_series = pd.concat(data_series)
    assert isinstance(total_series, pd.Series)
    ax = total_series.plot(title="{}-{}".format(start_year, end_year))
    plt.show()
def plot_monthly_clim_in_a_panel(months=None, diag_folder="", vname="STFL",
                                 grid_config=None, basins_of_interest_shp=""):
    """
    Plots climatologies using diagnostics outputs, not samples
    :param months:
    :param diag_folder:
    :param vname:
    """
    if months is None:
        months = list(range(1, 13))


    diag_path = Path(diag_folder)
    month_to_field = OrderedDict()

    lons, lats, bmp = None, None, None
    data_mask = None

    for m in months:

        r = MultiRPN(str(diag_path.joinpath("*{:02d}".format(m)).joinpath("pm*_moyenne")))

        date_to_field = r.get_all_time_records_for_name_and_level()

        the_mean = np.mean([f for f in date_to_field.values()], axis=0)

        the_mean = np.ma.masked_where(the_mean < 0, the_mean)

        month_to_field[m] = the_mean

        if bmp is None:
            lons, lats = r.get_longitudes_and_latitudes_of_the_last_read_rec()

            # get the basemap object
            bmp, data_mask = grid_config.get_basemap_using_shape_with_polygons_of_interest(
                lons, lats, shp_path=basins_of_interest_shp, mask_margin=5)

        r.close()



    fig = plt.figure()


    ncols = 3
    nrows = len(months) // ncols + int(len(months) % ncols != 0)

    gs = GridSpec(nrows=nrows, ncols=ncols + 1)


    xx, yy = bmp(lons, lats)

    clevs = [0, 20, 50, 100, 200, 500, 1000, 1500, 3000, 4500, 5000, 7000, 9000]
    bn = BoundaryNorm(clevs, len(clevs) - 1)
    cmap = cm.get_cmap("jet", len(clevs) - 1)
    for m, field in month_to_field.items():
        row = (m - 1) // ncols
        col = (m - 1) % ncols

        ax = fig.add_subplot(gs[row, col])
        ax.set_title(calendar.month_name[m])

        to_plot = np.ma.masked_where(~data_mask, field)
        im = bmp.pcolormesh(xx, yy, to_plot, norm=bn, cmap=cmap, vmin=clevs[0], vmax=clevs[-1])
        bmp.colorbar(im, extend="max")

        bmp.readshapefile(basins_of_interest_shp[:-4], "basins", linewidth=2, color="m", ax=ax)
        bmp.drawcoastlines(ax=ax)


    plt.close(fig)

    # plot annual mean
    ann_mean = np.mean([field for m, field in month_to_field.items()], axis=0)
    fig = plt.figure()
    ax = fig.add_subplot(gs[:, :])
    ax.set_title("Annual mean")

    to_plot = np.ma.masked_where(~data_mask, ann_mean)
    im = bmp.pcolormesh(xx, yy, to_plot, norm=bn, cmap=cmap, vmin=clevs[0], vmax=clevs[-1])
    bmp.colorbar(im, extend="max")

    bmp.readshapefile(basins_of_interest_shp[:-4], "basins", linewidth=2, color="m", ax=ax)
    bmp.drawcoastlines(ax=ax)

    plt.show()
    plt.close(fig)
예제 #18
0
def get_area_avg_timeseries(samples_dir, start_year=-np.Inf, end_year=np.Inf, filename_prefix="pm",
                            level=-1, level_kind=level_kinds.ARBITRARY,
                            varname="", mask=None, mask_lons2d=None, mask_lats2d=None, file_per_var=False) -> pd.Series:
    """
    get the timeseries of area averaged ice fraction
    :rtype : pd.Series
    """

    yearly_ts = []


    lons2d, lats2d = None, None
    samples_dir_p = Path(samples_dir)


    # interpolated mask
    interpolated_mask = None


    for y in range(start_year, end_year + 1):
        files_for_year = []

        mfolders = [f for f in samples_dir_p.iterdir() if f.name[:-2].endswith(str(y))]

        for mfolder in mfolders:
            # Select all files containing the varname in the filename
            if file_per_var:
                files_for_year += [str(f) for f in mfolder.iterdir() if varname in f.name]
            else:
                files_for_year += [str(f) for f in mfolder.iterdir() if f.name.startswith(filename_prefix) and f.name[-9:-1] != "0" * 8]


        if len(files_for_year) == 0:
            continue

        mrpn = MultiRPN(files_for_year)
        data = mrpn.get_all_time_records_for_name_and_level(varname=varname, level=level, level_kind=level_kind)

        if lons2d is None:
            lons2d, lats2d = mrpn.get_longitudes_and_latitudes_of_the_last_read_rec()


        # interpolate the mask using nearest neighbour approach
        if interpolated_mask is None:
            xs, ys, zs = lat_lon.lon_lat_to_cartesian(mask_lons2d.flatten(), mask_lats2d.flatten())
            ktree = KDTree(data=list(zip(xs, ys, zs)))

            xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons2d.flatten(), lats2d.flatten())
            dists, inds = ktree.query(list(zip(xt, yt, zt)), k=1)

            interpolated_mask = mask.flatten()[inds]
            interpolated_mask.shape = lons2d.shape


        for t, field in data.items():
            data[t] = field[interpolated_mask].mean()

        tlist = [t for t in data.keys()]
        ser = pd.Series(index=tlist, data=[data[t] for t in tlist])
        ser.sort_index(inplace=True)
        yearly_ts.append(ser)
        mrpn.close()

    return pd.concat(yearly_ts), lons2d, lats2d
예제 #19
0
def main():
    erainterim_075_folder = "/HOME/data/Validation/ERA-Interim_0.75/Offline_driving_data/3h_Forecast"

    vname = "PR"
    start_year = 1980
    end_year = 2010

    season_key = "summer"
    season_labels = {season_key: "Summer"}
    season_to_months = OrderedDict([(season_key, [6, 7, 8])])

    # Validate temperature and precip
    model_vars = ["TT", "PR"]
    obs_vars = ["tmp", "pre"]

    obs_paths = [
        "/HOME/data/Validation/CRU_TS_3.1/Original_files_gzipped/cru_ts_3_10.1901.2009.tmp.dat.nc",
        "/HOME/data/Validation/CRU_TS_3.1/Original_files_gzipped/cru_ts_3_10.1901.2009.pre.dat.nc",
    ]

    model_var_to_obs_var = dict(zip(model_vars, obs_vars))
    model_var_to_obs_path = dict(zip(model_vars, obs_paths))

    obs_path = model_var_to_obs_path[vname]

    cru = CRUDataManager(var_name=model_var_to_obs_var[vname], path=obs_path)

    # Calculate seasonal means for CRU
    seasonal_clim_fields_cru = cru.get_seasonal_means(
        season_name_to_months=season_to_months, start_year=start_year, end_year=end_year
    )

    # Calculate seasonal mean for erai
    flist = get_files_for_season(
        erainterim_075_folder, start_year=start_year, end_year=end_year, months=season_to_months[season_key]
    )

    rpf = MultiRPN(flist)
    date_to_field_erai075 = rpf.get_all_time_records_for_name_and_level(varname=vname, level=-1)

    # Convert to mm/day
    era075 = np.mean([field for field in date_to_field_erai075.values()], axis=0) * 24 * 3600 * 1000
    lons_era, lats_era = rpf.get_longitudes_and_latitudes_of_the_last_read_rec()

    seasonal_clim_fields_cru_interp = OrderedDict()

    # Calculate biases
    for season, cru_field in seasonal_clim_fields_cru.items():
        seasonal_clim_fields_cru_interp[season] = cru.interpolate_data_to(
            cru_field, lons2d=lons_era, lats2d=lats_era, nneighbours=1
        )

    # Do the plotting ------------------------------------------------------------------------------
    plot_utils.apply_plot_params()
    fig = plt.figure()

    b = Basemap()
    gs = gridspec.GridSpec(nrows=3, ncols=1)

    ax = fig.add_subplot(gs[0, 0])
    xx, yy = b(cru.lons2d, cru.lats2d)
    cs = b.contourf(xx, yy, seasonal_clim_fields_cru[season_key], 20)
    b.drawcoastlines(ax=ax)
    ax.set_title("CRU")
    plt.colorbar(cs, ax=ax)

    ax = fig.add_subplot(gs[1, 0])

    lons_era[lons_era > 180] -= 360
    lons_era, era075 = b.shiftdata(lons_era, datain=era075, lon_0=0)
    xx, yy = b(lons_era, lats_era)

    # mask oceans in the era plot as well
    era075 = maskoceans(lons_era, lats_era, era075)

    cs = b.contourf(xx, yy, era075, levels=cs.levels, norm=cs.norm, cmap=cs.cmap, ax=ax)
    b.drawcoastlines(ax=ax)
    ax.set_title("ERA-Interim 0.75")
    plt.colorbar(cs, ax=ax)

    # differences
    ax = fig.add_subplot(gs[2, 0])
    diff = era075 - seasonal_clim_fields_cru_interp[season_key]
    delta = np.percentile(np.abs(diff)[~diff.mask], 90)
    clevs = np.linspace(-delta, delta, 20)

    cs = b.contourf(xx, yy, diff, levels=clevs, cmap="RdBu_r", extend="both")
    b.drawcoastlines(ax=ax)
    ax.set_title("ERA-Interim 0.75 - CRU")
    plt.colorbar(cs, ax=ax)

    plt.show()

    fig.savefig(os.path.join(img_folder, "erai0.75_vs_cru_precip.png"), bbox_inches="tight")
예제 #20
0
def main():
    start_year = 1979
    end_year = 1981

    HL_LABEL = "CRCM5_HL"
    NEMO_LABEL = "CRCM5_NEMO"

    file_prefix = "dm"
    level = 1
    level_type = level_kinds.HYBRID

    wind_comp_names = ["UU", "VV"]

    sim_label_to_path = OrderedDict([
        (HL_LABEL,
         "/RESCUE/skynet3_rech1/huziy/CNRCWP/C5/2016/2-year-runs/coupled-GL+stfl_oneway/Samples"
         ),
        (NEMO_LABEL,
         "/HOME/huziy/skynet3_rech1/CNRCWP/C5/2016/2-year-runs/coupled-GL+stfl/Samples"
         )
    ])

    # get a coord file ...
    coord_file = ""
    found_coord_file = False
    for mdir in os.listdir(sim_label_to_path[HL_LABEL]):

        mdir_path = os.path.join(sim_label_to_path[HL_LABEL], mdir)
        if not os.path.isdir(mdir_path):
            continue

        for fn in os.listdir(mdir_path):
            print(fn)
            if fn[:2] not in ["pm", "dm", "pp", "dp"]:
                continue

            coord_file = os.path.join(mdir_path, fn)
            found_coord_file = True

        if found_coord_file:
            break

    bmp, lons, lats = nemo_hl_util.get_basemap_obj_and_coords_from_rpn_file(
        path=coord_file)
    xx, yy = bmp(lons, lats)
    lons[lons > 180] -= 360

    # loop through all files rotate vaectors and save to netcdf
    for sim_label, samples_dir in sim_label_to_path.items():

        samples = Path(samples_dir)
        po = samples.parent

        monthdate_to_path_list = nemo_hl_util.get_monthyeardate_to_paths_map(
            file_prefix=file_prefix,
            start_year=start_year,
            end_year=end_year,
            samples_dir_path=samples)

        # Netcdf output file to put rotated winds
        po /= "rotated_wind_{}.nc".format(sim_label)

        with Dataset(str(po), "w") as ds:

            ds.createDimension("time", None)
            ds.createDimension("lon", lons.shape[0])
            ds.createDimension("lat", lons.shape[1])

            # create the schema of the output file
            vname_to_ncvar = {}
            for vname in wind_comp_names:
                vname_to_ncvar[vname] = ds.createVariable(vname,
                                                          "f4",
                                                          dimensions=("time",
                                                                      "lon",
                                                                      "lat"))
                vname_to_ncvar[vname].units = "knots"

            lons_var = ds.createVariable("lon",
                                         "f4",
                                         dimensions=("lon", "lat"))
            lats_var = ds.createVariable("lat",
                                         "f4",
                                         dimensions=("lon", "lat"))
            time_var = ds.createVariable("time", "i8", dimensions=("time", ))
            time_var.units = "hours since {:%Y-%m-%d %H:%M:%S}".format(
                datetime(start_year, 1, 1))

            lons_var[:] = lons
            lats_var[:] = lats

            # use sorted dates
            record_count = 0

            for month_date in sorted(monthdate_to_path_list):
                # select only dm files
                mr = MultiRPN(path=monthdate_to_path_list[month_date])

                vname_to_fields = {}
                for vname in wind_comp_names:
                    vname_to_fields[
                        vname] = mr.get_all_time_records_for_name_and_level(
                            varname=vname, level=level, level_kind=level_type)

                for ti, t in enumerate(
                        sorted(vname_to_fields[wind_comp_names[0]])):
                    time_var[record_count] = date2num(t, time_var.units)

                    uu = vname_to_fields[wind_comp_names[0]][t]
                    vv = vname_to_fields[wind_comp_names[1]][t]

                    uu_rot, vv_rot = rotate_vecs_from_geo_to_rotpole(uu,
                                                                     vv,
                                                                     lons,
                                                                     lats,
                                                                     bmp=bmp)

                    # in knots not in m/s
                    vname_to_ncvar[wind_comp_names[0]][
                        record_count, :, :] = uu_rot
                    vname_to_ncvar[wind_comp_names[1]][
                        record_count, :, :] = vv_rot
                    record_count += 1
예제 #21
0
def get_area_avg_timeseries(samples_dir,
                            start_year=-np.Inf,
                            end_year=np.Inf,
                            filename_prefix="pm",
                            level=-1,
                            level_kind=level_kinds.ARBITRARY,
                            varname="",
                            mask=None,
                            mask_lons2d=None,
                            mask_lats2d=None,
                            file_per_var=False) -> pd.Series:
    """
    get the timeseries of area averaged ice fraction
    :rtype : pd.Series
    """

    yearly_ts = []

    lons2d, lats2d = None, None
    samples_dir_p = Path(samples_dir)

    # interpolated mask
    interpolated_mask = None

    for y in range(start_year, end_year + 1):
        files_for_year = []

        mfolders = [
            f for f in samples_dir_p.iterdir() if f.name[:-2].endswith(str(y))
        ]

        for mfolder in mfolders:
            # Select all files containing the varname in the filename
            if file_per_var:
                files_for_year += [
                    str(f) for f in mfolder.iterdir() if varname in f.name
                ]
            else:
                files_for_year += [
                    str(f) for f in mfolder.iterdir()
                    if f.name.startswith(filename_prefix)
                    and f.name[-9:-1] != "0" * 8
                ]

        if len(files_for_year) == 0:
            continue

        mrpn = MultiRPN(files_for_year)
        data = mrpn.get_all_time_records_for_name_and_level(
            varname=varname, level=level, level_kind=level_kind)

        if lons2d is None:
            lons2d, lats2d = mrpn.get_longitudes_and_latitudes_of_the_last_read_rec(
            )

        # interpolate the mask using nearest neighbour approach
        if interpolated_mask is None:
            xs, ys, zs = lat_lon.lon_lat_to_cartesian(mask_lons2d.flatten(),
                                                      mask_lats2d.flatten())
            ktree = KDTree(data=list(zip(xs, ys, zs)))

            xt, yt, zt = lat_lon.lon_lat_to_cartesian(lons2d.flatten(),
                                                      lats2d.flatten())
            dists, inds = ktree.query(list(zip(xt, yt, zt)), k=1)

            interpolated_mask = mask.flatten()[inds]
            interpolated_mask.shape = lons2d.shape

        for t, field in data.items():
            data[t] = field[interpolated_mask].mean()

        tlist = [t for t in data.keys()]
        ser = pd.Series(index=tlist, data=[data[t] for t in tlist])
        ser.sort_index(inplace=True)
        yearly_ts.append(ser)
        mrpn.close()

    return pd.concat(yearly_ts), lons2d, lats2d