def point_comparisons_at_outlets(hdf_folder="/home/huziy/skynet3_rech1/hdf_store"):
    start_year = 1979
    end_year = 1981

    sim_name_to_file_name = {
        # "CRCM5-R": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r_spinup.hdf",
        # "CRCM5-HCD-R": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r_spinup2.hdf",
        "CRCM5-HCD-RL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf",
        "CRCM5-HCD-RL-INTFL": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf",
        # "SANI=10000, ignore THFC":
        # "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000_not_care_about_thfc.hdf",

        # "CRCM5-HCD-RL-ERA075": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap_era075.hdf",
        "SANI=10000": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000.hdf"
        # "CRCM5-HCD-RL-ECOCLIMAP": "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ecoclimap.hdf"
    }

    path0 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[0][1])
    path1 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[1][1])
    flow_directions = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lake_fraction = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    slope = analysis.get_array_from_file(path=path1, var_name=infovar.HDF_SLOPE_NAME)

    lons2d, lats2d, _ = analysis.get_basemap_from_hdf(file_path=path0)

    cell_manager = CellManager(flow_directions, lons2d=lons2d, lats2d=lats2d)
    mp_list = cell_manager.get_model_points_of_outlets(lower_accumulation_index_limit=10)

    assert len(mp_list) > 0

    # Get the accumulation indices so that the most important outlets can be identified
    acc_ind_list = [np.sum(cell_manager.get_mask_of_upstream_cells_connected_with_by_indices(mp.ix, mp.jy))
                    for mp in mp_list]

    for mp, acc_ind in zip(mp_list, acc_ind_list):
        mp.acc_index = acc_ind

    mp_list.sort(key=lambda x: x.acc_index)

    # do not take global lake cells into consideration, and discard points with slopes 0 or less
    mp_list = [mp for mp in mp_list if lake_fraction[mp.ix, mp.jy] < 0.6 and slope[mp.ix, mp.jy] >= 0]

    mp_list = mp_list[-12:]  # get 12 most important outlets

    print("The following outlets were chosen for analysis")
    pattern = "({0}, {1}): acc_index = {2} cells; fldr = {3}; lake_fraction = {4}"
    for mp in mp_list:
        print(pattern.format(mp.ix, mp.jy, mp.acc_index, cell_manager.flow_directions[mp.ix, mp.jy],
                             lake_fraction[mp.ix, mp.jy]))

    draw_model_comparison(model_points=mp_list, sim_name_to_file_name=sim_name_to_file_name, hdf_folder=hdf_folder,
                          start_year=start_year, end_year=end_year, cell_manager=cell_manager)
def main():

    data_path = "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5"
    start_year = 1980
    end_year = 2010
    vname = "TRAF"
    level_index = 0


    fldr = analysis.get_array_from_file(data_path, var_name="flow_direction")
    lkfr = analysis.get_array_from_file(data_path, var_name="lake_fraction")

    the_mask = np.ma.masked_all_like(fldr)

    the_mask[fldr > 0] = (1 - lkfr)[fldr > 0]


    ser = analysis.get_area_mean_timeseries(hdf_path=data_path, var_name=vname, level_index=level_index,
                                            start_year=start_year, end_year=end_year, the_mask=the_mask)



    monthly_ser = ser.groupby(lambda d: datetime(d.year, d.month, 15)).mean()





    # do the plotting
    plot_utils.apply_plot_params()
    fig = plt.figure()

    monthly_ser = monthly_ser * 24 * 3600  # convert to mm/day

    monthly_ser.groupby(lambda d: d.month).plot()
    ax = plt.gca()
    assert isinstance(ax, Axes)
    ax.grid()

    fig.savefig(data_path[:-5] + "_{}_level_index_{}_{}-{}_timeseries.png".format(vname, level_index, start_year, end_year),
                transparent=True, dpi=common_plot_params.FIG_SAVE_DPI, bbox_inches="tight")

    plt.show()
Ejemplo n.º 3
0
def main():

    data_path = "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5"
    start_year = 1980
    end_year = 2010
    vname = "TRAF"
    level_index = 0

    fldr = analysis.get_array_from_file(data_path, var_name="flow_direction")
    lkfr = analysis.get_array_from_file(data_path, var_name="lake_fraction")

    the_mask = np.ma.masked_all_like(fldr)

    the_mask[fldr > 0] = (1 - lkfr)[fldr > 0]

    ser = analysis.get_area_mean_timeseries(hdf_path=data_path,
                                            var_name=vname,
                                            level_index=level_index,
                                            start_year=start_year,
                                            end_year=end_year,
                                            the_mask=the_mask)

    monthly_ser = ser.groupby(lambda d: datetime(d.year, d.month, 15)).mean()

    # do the plotting
    plot_utils.apply_plot_params()
    fig = plt.figure()

    monthly_ser = monthly_ser * 24 * 3600  # convert to mm/day

    monthly_ser.groupby(lambda d: d.month).plot()
    ax = plt.gca()
    assert isinstance(ax, Axes)
    ax.grid()

    fig.savefig(data_path[:-5] +
                "_{}_level_index_{}_{}-{}_timeseries.png".format(
                    vname, level_index, start_year, end_year),
                transparent=True,
                dpi=common_plot_params.FIG_SAVE_DPI,
                bbox_inches="tight")

    plt.show()
Ejemplo n.º 4
0
def main():
    # Define the simulations to be validated
    r_config = RunConfig(
        data_path=
        "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5",
        start_year=1990,
        end_year=2010,
        label="CRCM5-L1")
    r_config_list = [r_config]

    r_config = RunConfig(
        data_path=
        "/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r.hdf5",
        start_year=1990,
        end_year=2010,
        label="CRCM5-NL")
    r_config_list.append(r_config)

    bmp_info = analysis.get_basemap_info_from_hdf(file_path=r_config.data_path)
    bmp_info.should_draw_grey_map_background = True
    bmp_info.should_draw_basin_boundaries = False
    bmp_info.map_bg_color = "0.75"

    station_ids = ["104001", "093806", "093801", "081002", "081007", "080718"]

    # get river network information used in the model
    flow_directions = analysis.get_array_from_file(
        r_config.data_path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    accumulation_area_km2 = analysis.get_array_from_file(
        path=r_config.data_path, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    cell_manager = CellManager(flow_dirs=flow_directions,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats,
                               accumulation_area_km2=accumulation_area_km2)

    # Get the list of stations to indicate on the bias map
    stations = cehq_station.read_station_data(start_date=None,
                                              end_date=None,
                                              selected_ids=station_ids)
    """:type : list[Station]"""

    xx, yy = bmp_info.get_proj_xy()
    station_to_modelpoint = cell_manager.get_model_points_for_stations(
        station_list=stations)
    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=station_to_modelpoint.values(), xx=xx, yy=yy)

    bmp_info.draw_colorbar_for_each_subplot = True

    # Validate temperature, precip and swe
    obs_path_anusplin = "/home/huziy/skynet3_rech1/anusplin_links"
    obs_path_swe = "data/swe_ross_brown/swe.nc"
    model_var_to_obs_path = OrderedDict([("TT", obs_path_anusplin),
                                         ("I5", obs_path_swe)])

    model_var_to_season = OrderedDict([
        ("TT", OrderedDict([("Spring", range(3, 6))])),
        ("I5", OrderedDict([("Winter", [1, 2, 12])]))
    ])

    vname_to_obs_data = {}

    # parameters that won't change in the loop over variable names
    params_const = dict(rconfig=r_config, bmp_info=bmp_info)

    for vname, obs_path in model_var_to_obs_path.items():
        season_to_obs_data = get_seasonal_clim_obs_data(
            vname=vname,
            obs_path=obs_path,
            season_to_months=model_var_to_season[vname],
            **params_const)

        # Comment swe over lakes, since I5 calculated only for land
        if vname in [
                "I5",
        ]:
            for season in season_to_obs_data:
                season_to_obs_data[season] = maskoceans(
                    bmp_info.lons,
                    bmp_info.lats,
                    season_to_obs_data[season],
                    inlands=True)

        vname_to_obs_data[vname] = season_to_obs_data

    # Plotting
    plot_all_vars_in_one_fig = True

    fig = None
    gs = None
    if plot_all_vars_in_one_fig:
        plot_utils.apply_plot_params(font_size=12,
                                     width_pt=None,
                                     width_cm=25,
                                     height_cm=20)
        fig = plt.figure()
        ncols = len(model_var_to_obs_path) + 1
        gs = GridSpec(len(r_config_list),
                      ncols,
                      width_ratios=(ncols - 1) * [
                          1.,
                      ] + [
                          0.05,
                      ])
    else:
        plot_utils.apply_plot_params(font_size=12,
                                     width_pt=None,
                                     width_cm=25,
                                     height_cm=25)

    station_x_list = []
    station_y_list = []

    mvarname_to_cs = {}
    for row, r_config in enumerate(r_config_list):
        for col, mname in enumerate(model_var_to_obs_path):

            row_axes = [
                fig.add_subplot(gs[row, col]),
            ]

            mvarname_to_cs[mname] = compare_vars(
                vname_model=mname,
                vname_to_obs=vname_to_obs_data,
                r_config=r_config,
                season_to_months=model_var_to_season[mname],
                bmp_info_agg=bmp_info,
                axes_list=row_axes)

            # -1 in order to exclude colorbars
            for the_ax in row_axes:

                the_ax.set_title(the_ax.get_title() + ", {}".format(
                    infovar.get_long_display_label_for_var(mname)))
                # Need titles only for the first row
                if row > 0:
                    the_ax.set_title("")

                if col == 0:
                    the_ax.set_ylabel(r_config.label)
                else:
                    the_ax.set_ylabel("")

                draw_upstream_area_bounds(the_ax, upstream_edges, color="g")

                if len(station_x_list) == 0:
                    for the_station in stations:
                        xst, yst = bmp_info.basemap(the_station.longitude,
                                                    the_station.latitude)
                        station_x_list.append(xst)
                        station_y_list.append(yst)

                bmp_info.basemap.scatter(station_x_list,
                                         station_y_list,
                                         c="g",
                                         ax=the_ax,
                                         s=20,
                                         zorder=10,
                                         alpha=0.5)

    # Save the figure if necessary
    if plot_all_vars_in_one_fig:

        if not img_folder.is_dir():
            img_folder.mkdir(parents=True)

        fig_path = img_folder.joinpath("{}.png".format(
            "_".join(model_var_to_obs_path)))
        with fig_path.open("wb") as figfile:
            fig.savefig(figfile, format="png", bbox_inches="tight")

        plt.close(fig)
Ejemplo n.º 5
0
def main():

    start_year = 1980
    end_year = 2003

    months_of_obs = [12, 1, 2, 3, 4, 5]

    r_config = RunConfig(
        data_path="/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5",
        start_year=start_year, end_year=end_year, label="ERAI-CRCM5-L"
    )

    var_name = "LC"
    bmp_info = analysis.get_basemap_info(r_config=r_config)
    lkid_to_mask = get_lake_masks(bmp_info.lons, bmp_info.lats)


    cell_area_m2 = analysis.get_array_from_file(path=r_config.data_path, var_name="cell_area_m2")


    # read the model data
    lkid_to_ts_model = {}
    for lkid, the_mask in lkid_to_mask.items():
        lkid_to_ts_model[lkid] = analysis.get_area_mean_timeseries(r_config.data_path, var_name=var_name, the_mask=the_mask * cell_area_m2,
                                                                   start_year=start_year, end_year=end_year)

        df = lkid_to_ts_model[lkid]

        # remove the last December
        df = df.select(lambda d: not (d.year == end_year and d.month == 12))

        # remove the first Jan and Feb
        df = df.select(lambda d: not (d.year == start_year and d.month in [1, 2]))

        # remove the Feb 29th
        df = df.select(lambda d: not (d.month == 2 and d.day == 29))

        # select months of interest
        df = df.select(lambda d: d.month in months_of_obs)

        # calculate the climatology
        df = df.groupby(lambda d: datetime(2001 if d.month == 12 else 2002, d.month, d.day)).mean()
        df.sort_index(inplace=True)


        lkid_to_ts_model[lkid] = df * 100


    # read obs data and calculate climatology
    lkid_to_ts_obs = {}
    for lkid in LAKE_IDS:
        lkid_to_ts_obs[lkid] = GL_obs_timeseries.get_ts_from_file(path=os.path.join(OBS_DATA_FOLDER, "{}-30x.TXT".format(lkid)),
                                                                  start_year=start_year, end_year=end_year - 1)

        # get the climatology
        dfm = lkid_to_ts_obs[lkid].mean(axis=1)

        dfm.index = [datetime(2001, 1, 1) + timedelta(days=int(jd - 1)) for jd in dfm.index]

        lkid_to_ts_obs[lkid] = dfm


    # plotting
    plot_utils.apply_plot_params(font_size=10)
    fig = plt.figure()
    gs = GridSpec(nrows=len(lkid_to_ts_model), ncols=2)

    for row, lkid in enumerate(lkid_to_ts_model):

        ax = fig.add_subplot(gs[row, 0])

        mod = lkid_to_ts_model[lkid]
        obs = lkid_to_ts_obs[lkid]

        print(obs.index)
        print(obs.values)

        ax.plot(mod.index, mod.values, label=r_config.label, color="r", lw=2)
        ax.plot(obs.index, obs.values, label="NOAA NIC/CIS", color="k", lw=2)

        if row == 0:
            ax.legend()

        ax.set_title(lkid)

        ax.xaxis.set_major_formatter(DateFormatter("%b"))


    fig.tight_layout()
    fig.savefig(os.path.join(img_folder, "GL_ice-cover-validation.png"), bbox_inches="tight", dpi=common_plot_params.FIG_SAVE_DPI)
def main():
    season_to_months = DEFAULT_SEASON_TO_MONTHS

    r_config = RunConfig(
        data_path="/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5",
        start_year=1990, end_year=2010, label="CRCM5-L"
    )

    bmp_info = analysis.get_basemap_info_from_hdf(file_path=r_config.data_path)
    bmp_info.should_draw_grey_map_background = True
    bmp_info.should_draw_basin_boundaries = False
    bmp_info.map_bg_color = "0.75"

    station_ids = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    # get river network information used in the model
    flow_directions = analysis.get_array_from_file(r_config.data_path, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    accumulation_area_km2 = analysis.get_array_from_file(path=r_config.data_path,
                                                         var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    cell_manager = CellManager(flow_dirs=flow_directions,
                               lons2d=bmp_info.lons,
                               lats2d=bmp_info.lats, accumulation_area_km2=accumulation_area_km2)

    # Get the list of stations to indicate on the bias map
    stations = cehq_station.read_station_data(
        start_date=None, end_date=None, selected_ids=station_ids
    )
    """:type : list[Station]"""

    xx, yy = bmp_info.get_proj_xy()
    station_to_modelpoint = cell_manager.get_model_points_for_stations(station_list=stations)
    upstream_edges = cell_manager.get_upstream_polygons_for_points(
        model_point_list=station_to_modelpoint.values(), xx=xx, yy=yy)


    # Validate temperature, precip and swe
    obs_path_anusplin = "/home/huziy/skynet3_rech1/anusplin_links"
    obs_path_swe = "data/swe_ross_brown/swe.nc"
    model_var_to_obs_path = OrderedDict([
        ("TT", obs_path_anusplin),
    #    ("PR", obs_path_anusplin),
        ("I5", obs_path_swe)
    ])



    vname_to_obs_data = {}

    # parameters that won't change in the loop over variable names
    params_const = dict(rconfig=r_config, bmp_info=bmp_info, season_to_months=season_to_months)

    for vname, obs_path in model_var_to_obs_path.items():
        season_to_obs_data = get_seasonal_clim_obs_data(vname=vname, obs_path=obs_path, **params_const)

        # Comment swe over lakes, since I5 calculated only for land
        if vname in ["I5", ]:
            for season in season_to_obs_data:
                season_to_obs_data[season] = maskoceans(bmp_info.lons, bmp_info.lats,
                                                        season_to_obs_data[season],
                                                        inlands=True)

        vname_to_obs_data[vname] = season_to_obs_data


    # Plotting
    plot_all_vars_in_one_fig = True

    fig = None
    gs = None
    row_axes = []
    ncols = None
    if plot_all_vars_in_one_fig:
        plot_utils.apply_plot_params(font_size=12, width_pt=None, width_cm=25, height_cm=20)
        fig = plt.figure()
        ncols = len(season_to_months) + 1
        gs = GridSpec(len(model_var_to_obs_path), ncols, width_ratios=(ncols - 1) * [1., ] + [0.05, ])
    else:
        plot_utils.apply_plot_params(font_size=12, width_pt=None, width_cm=25, height_cm=25)

    row = 0
    station_x_list = []
    station_y_list = []
    for mname in model_var_to_obs_path:

        if plot_all_vars_in_one_fig:
            row_axes = [fig.add_subplot(gs[row, col]) for col in range(ncols)]

        compare_vars(vname_model=mname, vname_to_obs=vname_to_obs_data,
                     r_config=r_config,
                     season_to_months=season_to_months,
                     bmp_info_agg=bmp_info,
                     axes_list=row_axes)

        # -1 in order to exclude colorbars
        for the_ax in row_axes[:-1]:

            # Need titles only for the first row
            if row > 0:
                the_ax.set_title("")

            draw_upstream_area_bounds(the_ax, upstream_edges)

            if len(station_x_list) == 0:
                for the_station in stations:
                    xst, yst = bmp_info.basemap(the_station.longitude, the_station.latitude)
                    station_x_list.append(xst)
                    station_y_list.append(yst)

            bmp_info.basemap.scatter(station_x_list, station_y_list, c="g", ax=the_ax, s=5, zorder=10, alpha=0.5)



        # Hide fall swe
        if mname in ["I5"]:
            row_axes[-2].set_visible(False)

        row += 1


    # Save the figure if necessary
    if plot_all_vars_in_one_fig:
        fig_path = img_folder.joinpath("{}.png".format("_".join(model_var_to_obs_path)))
        with fig_path.open("wb") as figfile:
            fig.savefig(figfile, format="png", bbox_inches="tight")

        plt.close(fig)
Ejemplo n.º 7
0
def get_mean_diffs(interflow_data_path="", base_data_path="",
                   start_year=1980, end_year=2010, months_of_interest=(4, 5, 6, 7, 8, 9),
                   delete_cache=True):
    """
    Get mean differences for fixed variables, between interflow_data_path and base_data_path files
    :param interflow_data_path:
    :param base_data_path:
    :param start_year:
    :param end_year:
    :param months_of_interest:
    :return:
    """
    # Build the name of the cache file
    cache_file = "cache_extr_intf_effect{}-{}_{}.bin".format(start_year, end_year,
                                                             "-".join(str(m) for m in months_of_interest))

    # Do not use caching by default
    if delete_cache:
        os.remove(cache_file)

    if os.path.isfile(cache_file):
        return pickle.load(open(cache_file))

    precip_limit = 0.0  # at least it should rain
    tt_limit = 0  # and the oil should not be frozen

    traf_diff = None  # surface runoff difference
    prcip_diff = None
    drainage_diff = None  # drainage difference
    i1_diff = None  # soil moisture difference
    months_query = "{}".format("|".join(["(month=={})".format(m) for m in months_of_interest]))
    year_query = "(year >= {}) & (year <= {})".format(start_year, end_year)
    print("months_query = {}".format(months_query))

    depth_to_bedrock = pt_analysis.get_array_from_file(base_data_path, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)

    with tb.open_file(interflow_data_path) as h_intf:
        pr_intf_table = h_intf.get_node("/", "PR")
        tt_intf_table = h_intf.get_node("/", "TT")
        traf_intf_table = h_intf.get_node("/", "TRAF")
        tdra_intf_table = h_intf.get_node("/", "TDRA")
        i1_intf_table = h_intf.get_node("/", "I1")

        assert isinstance(pr_intf_table, tb.Table)
        assert isinstance(tt_intf_table, tb.Table)
        assert isinstance(traf_intf_table, tb.Table)
        assert isinstance(tdra_intf_table, tb.Table)

        print(len(pr_intf_table), len(tt_intf_table), len(traf_intf_table))

        with tb.open_file(base_data_path) as h_nointf:

            pr_nointf_table = h_nointf.get_node("/", "PR")
            tt_nointf_table = h_nointf.get_node("/", "TT")
            traf_nointf_table = h_nointf.get_node("/", "TRAF")
            tdra_nointf_table = h_nointf.get_node("/", "TDRA")
            i1_nointf_table = h_nointf.get_node("/", "I1")

            assert isinstance(pr_nointf_table, tb.Table)
            assert isinstance(tt_nointf_table, tb.Table)
            assert isinstance(traf_nointf_table, tb.Table)
            assert isinstance(tdra_nointf_table, tb.Table)

            for rownum, pr_intf_row in enumerate(pr_intf_table.where("({}) & {}".format(months_query, year_query))):
                year, month, day, hour = [pr_intf_row[k] for k in ["year", "month", "day", "hour"]]
                # print year, month, day, hour

                pr_intf_field = pr_intf_row["field"]
                tt_intf_field = None
                traf_intf_field = None
                tdra_intf_field = None
                i1_intf_field = None

                pr_nointf_field = None
                tt_nointf_field = None
                traf_nointf_field = None
                tdra_nointf_field = None
                i1_nointf_field = None

                # Get air temperature and precipitation for the same time
                tt_query = "(year == {}) & (month == {}) & (day == {}) & (hour == {})".format(year, month, day, hour)
                traf_query = "{} & (level_index == {})".format(tt_query, 0)
                for tt_row in tt_intf_table.where(tt_query):
                    tt_intf_field = tt_row["field"]
                    break

                # print tt_intf_field.min(), tt_intf_field.max()


                for traf_row in traf_intf_table.where(traf_query):
                    traf_intf_field = traf_row["field"]
                    break

                for tdra_row in tdra_intf_table.where(traf_query):
                    tdra_intf_field = tdra_row["field"]
                    break

                for i1_row in i1_intf_table.where(traf_query):
                    i1_intf_field = i1_row["field"]
                    break


                # for no interflow simulation
                for tt_row in tt_nointf_table.where(tt_query):
                    tt_nointf_field = tt_row["field"]
                    break

                for pr_row in pr_nointf_table.where(tt_query):
                    pr_nointf_field = pr_row["field"]
                    break

                for traf_row in traf_nointf_table.where(traf_query):
                    traf_nointf_field = traf_row["field"]
                    break

                for tdra_row in tdra_nointf_table.where(traf_query):
                    tdra_nointf_field = tdra_row["field"]
                    break

                for i1_row in i1_nointf_table.where(traf_query):
                    i1_nointf_field = i1_row["field"]
                    break

                if traf_diff is None:
                    traf_diff = np.zeros(pr_intf_field.shape)
                    prcip_diff = np.zeros(pr_intf_field.shape)
                    drainage_diff = np.zeros(pr_intf_field.shape)
                    i1_diff = np.zeros(pr_intf_field.shape)

                points_of_interest = (
                    (pr_intf_field > precip_limit) & (pr_nointf_field > precip_limit) &
                    (tt_intf_field > tt_limit) & (tt_nointf_field > tt_limit)
                    & (abs(pr_intf_field - pr_nointf_field) < 0.01 * (pr_intf_field + pr_nointf_field) / 2.0)
                )

                if rownum % 100 == 0:
                    print("Precipitation ranges in M/s")
                    print(pr_intf_field.min(), pr_intf_field.max())
                    print(pr_nointf_field.min(), pr_nointf_field.max())

                if traf_intf_field is None:
                    print("intf field is none")
                    print(traf_query)

                if traf_nointf_field is None:
                    print("nointf field is none")
                    print(traf_query)

                traf_diff[points_of_interest] += traf_intf_field[points_of_interest] - \
                                                 traf_nointf_field[points_of_interest]

                prcip_diff[points_of_interest] += pr_intf_field[points_of_interest] - \
                                                  pr_nointf_field[points_of_interest]

                drainage_diff[points_of_interest] += tdra_intf_field[points_of_interest] - \
                                                     tdra_nointf_field[points_of_interest]

                i1_diff[points_of_interest] += i1_intf_field[points_of_interest] - \
                                               i1_nointf_field[points_of_interest]

                # if rownum % 100 == 0 and debug_plots:
                #     fig = plt.figure()
                #     im = plt.pcolormesh(traf_diff.transpose() * 3 * 60 * 60)
                #     plt.colorbar(im)
                #     plt.savefig("{}/{}.jpg".format(img_dir, rownum))
                #     plt.close(fig)
                #
                #     plt.figure()
                #     im = plt.pcolormesh(traf_intf_field.transpose() * 60 * 60 * 24)
                #     plt.colorbar(im)
                #     plt.savefig("{}/traf_{}.jpg".format(img_dir, rownum))
                #     plt.close(fig)

    pickle.dump([traf_diff, prcip_diff, drainage_diff, i1_diff], open(cache_file, "w"))
    return traf_diff, prcip_diff, drainage_diff, i1_diff
def compare(paths=None, path_to_control_data=None, control_label="",
            labels=None, varnames=None, levels=None, months_of_interest=None,
            start_year=None, end_year=None):
    """
    Comparing 2D fields
    :param paths: paths to the simulation results
    :param varnames:
    :param labels: Display name for each simulation (number of labels should
     be equal to the number of paths)
    :param path_to_control_data: the path with which the comparison done i.e. a in the following
     formula
            delta = (x - a)/a * 100%

     generates one image file per variable (in the folder images_for_lake-river_paper):
        compare_varname_<control_label>_<label1>_..._<labeln>_startyear_endyear.png

    """
    # get coordinate data  (assumes that all the variables and runs have the same coordinates)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path_to_control_data)
    x, y = basemap(lons2d, lats2d)

    lake_fraction = analysis.get_array_from_file(path=path_to_control_data, var_name="lake_fraction")

    if lake_fraction is None:
        lake_fraction = np.zeros(lons2d.shape)

    ncolors = 10
    # +1 to include white
    diff_cmap = cm.get_cmap("RdBu_r", ncolors + 1)

    for var_name, level in zip(varnames, levels):
        sfmt = infovar.get_colorbar_formatter(var_name)
        control_means = analysis.get_mean_2d_fields_for_months(path=path_to_control_data, var_name=var_name,
                                                               months=months_of_interest,
                                                               start_year=start_year, end_year=end_year,
                                                               level=level)

        control_mean = np.mean(control_means, axis=0)
        fig = plt.figure()
        assert isinstance(fig, Figure)
        gs = gridspec.GridSpec(2, len(paths) + 1, wspace=0.5)

        # plot the control
        ax = fig.add_subplot(gs[0, 0])
        assert isinstance(ax, Axes)
        ax.set_title("{0}".format(control_label))
        ax.set_ylabel("Mean: $X_{0}$")
        to_plot = infovar.get_to_plot(var_name, control_mean,
                                      lake_fraction=lake_fraction, mask_oceans=True, lons=lons2d, lats=lats2d)
        # determine colorabr extent and spacing
        field_cmap, field_norm = infovar.get_colormap_and_norm_for(var_name, to_plot, ncolors=ncolors)

        basemap.pcolormesh(x, y, to_plot, cmap=field_cmap, norm=field_norm)
        cb = basemap.colorbar(format=sfmt)

        assert isinstance(cb, Colorbar)
        # cb.ax.set_ylabel(infovar.get_units(var_name))
        units = infovar.get_units(var_name)

        info = "Variable:" \
               "\n{0}" \
               "\nPeriod: {1}-{2}" \
               "\nMonths: {3}" \
               "\nUnits: {4}"

        info = info.format(infovar.get_long_name(var_name), start_year, end_year,
                           ",".join([datetime(2001, m, 1).strftime("%b") for m in months_of_interest]), units)

        ax.annotate(info, xy=(0.1, 0.3), xycoords="figure fraction")

        sel_axes = [ax]

        for the_path, the_label, column in zip(paths, labels, list(range(1, len(paths) + 1))):

            means_for_years = analysis.get_mean_2d_fields_for_months(path=the_path, var_name=var_name,
                                                                     months=months_of_interest,
                                                                     start_year=start_year, end_year=end_year)
            the_mean = np.mean(means_for_years, axis=0)

            # plot the mean value
            ax = fig.add_subplot(gs[0, column])
            sel_axes.append(ax)
            ax.set_title("{0}".format(the_label))
            to_plot = infovar.get_to_plot(var_name, the_mean, lake_fraction=lake_fraction,
                                          mask_oceans=True, lons=lons2d, lats=lats2d)

            basemap.pcolormesh(x, y, to_plot, cmap=field_cmap, norm=field_norm)
            ax.set_ylabel("Mean: $X_{0}$".format(column))
            cb = basemap.colorbar(format=sfmt)
            # cb.ax.set_ylabel(infovar.get_units(var_name))

            # plot the difference
            ax = fig.add_subplot(gs[1, column])
            sel_axes.append(ax)
            ax.set_ylabel("$X_{0} - X_0$".format(column))

            # #Mask only if the previous plot (means) is masked
            thediff = the_mean - control_mean

            if hasattr(to_plot, "mask"):
                to_plot = np.ma.masked_where(to_plot.mask, thediff)
            else:
                to_plot = thediff

            if var_name == "PR":  # convert to mm/day
                to_plot = infovar.get_to_plot(var_name, to_plot, mask_oceans=False)

            vmin = np.ma.min(to_plot)
            vmax = np.ma.max(to_plot)

            d = max(abs(vmin), abs(vmax))
            vmin = -d
            vmax = d

            field_norm, bounds, vmn_nice, vmx_nice = infovar.get_boundary_norm(vmin, vmax, diff_cmap.N,
                                                                               exclude_zero=False)
            basemap.pcolormesh(x, y, to_plot, cmap=diff_cmap, norm=field_norm, vmin=vmn_nice, vmax=vmx_nice)

            cb = basemap.colorbar(format=sfmt)

            t, pval = ttest_ind(means_for_years, control_means, axis=0)
            sig = pval < 0.1
            basemap.contourf(x, y, sig.astype(int), nlevels=2, hatches=["+", None], colors="none")

            # cb.ax.set_ylabel(infovar.get_units(var_name))

        # plot coastlines
        for the_ax in sel_axes:
            basemap.drawcoastlines(ax=the_ax, linewidth=common_plot_params.COASTLINE_WIDTH)

        # depends on the compared simulations and the months of interest
        fig_file_name = "compare_{0}_{1}_{2}_months-{3}.jpeg".format(var_name, control_label,
                                                                     "_".join(labels),
                                                                     "-".join([str(m) for m in months_of_interest]))
        figpath = os.path.join(images_folder, fig_file_name)
        fig.savefig(figpath, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
        plt.close(fig)
def plot_control_and_differences_in_one_panel_for_all_seasons_for_all_vars(
        varnames=None, levels=None,
        season_to_months=None,
        start_year=None,
        end_year=None):
    season_list = list(season_to_months.keys())

    pvalue_max = 0.1

    # crcm5-r vs crcm5-hcd-r
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r_spinup.hdf"
    # control_label = "CRCM5-R"
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r_spinup2.hdf", ]
    # labels = ["CRCM5-HCD-R"]

    # crcm5-hcd-rl vs crcm5-hcd-r
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r_spinup2.hdf"
    # control_label = "CRCM5-HCD-R"
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf", ]
    # labels = ["CRCM5-HCD-RL"]

    # compare simulations with and without interflow
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
    # control_label = "CRCM5-HCD-RL"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf", ]
    # labels = ["CRCM5-HCD-RL-INTFL"]

    # very high hydr cond
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf"
    # control_label = "CRCM5-HCD-RL-INTFL"
    ##
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000.hdf", ]
    # labels = ["CRCM5-HCD-RL-INTFL-sani=10000"]

    # Interflow effect
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
    # control_label = "CRCM5-HCD-RL"
    # ##
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5", ]
    # labels = ["ITFS"]


    # total lake effect
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r.hdf5"
    # control_label = "CRCM5-NL"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5", ]
    # labels = ["CRCM5-L2", ]



    # lake effect (lake-atm interactions)
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r.hdf5"
    # control_label = "CRCM5-R"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5", ]
    # labels = ["CRCM5-HCD-R", ]

    # lake effect (lake-river interactions)
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5"
    # control_label = "CRCM5-L1"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5", ]
    # labels = ["CRCM5-HCD-L2", ]


    # interflow effect ()
    control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5"
    control_label = "CRCM5-L2"

    paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5", ]
    labels = ["CRCM5-L2I", ]


    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5", ]
    # labels = ["CRCM5-HCD-RL-INTFb", ]



    # interflow effect (avoid truncation and bigger slopes)
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5"
    # control_label = "CRCM5-HCD-RL-INTF"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5", ]
    # labels = ["CRCM5-HCD-RL-INTF-improved", ]
    #

    row_labels = [
        r"{} vs {}".format(s, control_label) for s in labels
    ]
    print(labels)

    # varnames = ["QQ", ]
    # levels = [None, ]

    assert len(levels) == len(varnames)

    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=control_path)
    x, y = basemap(lons2d, lats2d)
    # save the domain properties for reuse
    domain_props = DomainProperties()
    domain_props.basemap = basemap
    domain_props.lons2d = lons2d
    domain_props.lats2d = lats2d
    domain_props.x = x
    domain_props.y = y

    lake_fraction = analysis.get_array_from_file(path=control_path, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    dpth_to_bedrock = analysis.get_array_from_file(path=control_path, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)

    assert dpth_to_bedrock is not None


    if lake_fraction is None:
        lake_fraction = np.zeros(lons2d.shape)

    ncolors = 10
    # +1 to include white
    diff_cmap = cm.get_cmap("RdBu", ncolors + 1)


    # Do the plotting for each variable
    fig = plt.figure()
    assert isinstance(fig, Figure)

    # plot the control data
    ncols = len(season_list) + 1  # +1 is for the colorbar
    gs = gridspec.GridSpec(len(varnames), ncols, width_ratios=[1.0, ] * (ncols - 1) + [0.07], top=0.95)


    lev_width_3d = np.ones(dpth_to_bedrock.shape + infovar.soil_layer_widths_26_to_60.shape)
    lev_width_3d *= infovar.soil_layer_widths_26_to_60[np.newaxis, np.newaxis, :]
    lev_bot_3d = lev_width_3d.cumsum(axis=2)

    correction = -lev_bot_3d + dpth_to_bedrock[:, :, np.newaxis]
    # Apply the correction only at points where the layer bottom is lower than
    # the bedrock
    lev_width_3d[correction < 0] += correction[correction < 0]
    lev_width_3d[lev_width_3d < 0] = 0


    # plot the plots one file per variable
    for var_name, level, the_row in zip(varnames, levels, list(range(len(varnames)))):
        sfmt = infovar.get_colorbar_formatter(var_name)
        season_to_control_mean = {}
        label_to_season_to_difference = {}
        label_to_season_to_significance = {}

        try:
            # Calculate the difference for each season, and save the results to dictionaries
            # to access later when plotting
            for season, months_of_interest in season_to_months.items():
                print("working on season: {0}".format(season))

                control_means = analysis.get_mean_2d_fields_for_months(path=control_path, var_name=var_name,
                                                                       months=months_of_interest,
                                                                       start_year=start_year, end_year=end_year,
                                                                       level=level)

                control_mean = np.mean(control_means, axis=0)

                control_mean = infovar.get_to_plot(var_name, control_mean,
                                                   lake_fraction=domain_props.lake_fraction,
                                                   lons=lons2d, lats=lats2d, level_width_m=lev_width_3d[:, :, level])

                # multiply by the number of days in a season for PR and TRAF to convert them into mm from mm/day
                if var_name in ["PR", "TRAF", "TDRA"]:
                    control_mean *= get_num_days(months_of_interest)
                    infovar.change_units_to(varnames=[var_name, ], new_units=r"${\rm mm}$")

                season_to_control_mean[season] = control_mean

                print("calculated mean from {0}".format(control_path))

                # calculate the difference for each simulation
                for the_path, the_label in zip(paths, row_labels):
                    modified_means = analysis.get_mean_2d_fields_for_months(path=the_path, var_name=var_name,
                                                                            months=months_of_interest,
                                                                            start_year=start_year, end_year=end_year,
                                                                            level=level)

                    tval, pval = ttest_ind(modified_means, control_means, axis=0, equal_var=False)
                    significance = ((pval <= pvalue_max) & (~control_mean.mask)).astype(int)
                    print("pval ranges: {} to {}".format(pval.min(), pval.max()))

                    modified_mean = np.mean(modified_means, axis=0)
                    if the_label not in label_to_season_to_difference:
                        label_to_season_to_difference[the_label] = OrderedDict()
                        label_to_season_to_significance[the_label] = OrderedDict()

                    modified_mean = infovar.get_to_plot(var_name, modified_mean,
                                                        lake_fraction=domain_props.lake_fraction, lons=lons2d,
                                                        lats=lats2d, level_width_m=lev_width_3d[:, :, level])

                    # multiply by the number of days in a season for PR and TRAF to convert them into mm from mm/day
                    if var_name in ["PR", "TRAF", "TDRA"]:
                        modified_mean *= get_num_days(months_of_interest)

                    diff_vals = modified_mean - control_mean

                    print("diff ranges: min: {0};  max: {1}".format(diff_vals.min(), diff_vals.max()))
                    label_to_season_to_difference[the_label][season] = diff_vals
                    label_to_season_to_significance[the_label][season] = significance

                    print("Calculated mean and diff from {0}".format(the_path))
        except NoSuchNodeError:
            print("Could not find {0}, skipping...".format(var_name))
            continue





        for the_label, data in label_to_season_to_difference.items():
            axes = []
            for col in range(ncols):
                axes.append(fig.add_subplot(gs[the_row, col]))

            # Set season titles
            if the_row == 0:
                for the_season, ax in zip(season_list, axes):
                    ax.set_title(the_season)


            _plot_row(axes, data, the_label, var_name, increments=True, domain_props=domain_props,
                      season_list=season_list, significance=label_to_season_to_significance[the_label])

            var_label = infovar.get_long_display_label_for_var(var_name)
            if var_name in ["I1"]:
                var_label = "{}\n{} layer".format(var_label, ordinal(level + 1))

            axes[0].set_ylabel(var_label)

    fig.suptitle("({}) vs ({})".format(labels[0], control_label), font_properties=FontProperties(weight="bold"))
    folderpath = os.path.join(images_folder, "seasonal_mean_maps/{0}_vs_{1}_for_{2}_{3}-{4}".format(
        "_".join(labels), control_label, "-".join(list(season_to_months.keys())), start_year, end_year))

    if not os.path.isdir(folderpath):
        os.mkdir(folderpath)

    imname = "{0}_{1}.png".format("-".join(varnames), "_".join(labels + [control_label]))
    impath = os.path.join(folderpath, imname)
    fig.savefig(impath, bbox_inches="tight")
def draw_model_comparison(model_points=None, stations=None, sim_name_to_file_name=None, hdf_folder=None,
                          start_year=None, end_year=None, cell_manager=None, stfl_name="STFA",
                          drainage_area_reldiff_min=0.1, plot_upstream_area_averaged=True,
                          sim_name_to_color=None):
    """

    :param model_points: list of model point objects
    :param stations: list of stations corresponding to the list of model points
    :param cell_manager: is a CellManager instance which can be provided for better performance if necessary
    len(model_points) == len(stations) if stations is not None.
    if stations is None - then no measured streamflow will be plotted
    """
    assert model_points is None or stations is None or len(stations) == len(model_points)
    label_list = list(sim_name_to_file_name.keys())  # Needed to keep the order the same for all subplots
    path0 = os.path.join(hdf_folder, list(sim_name_to_file_name.items())[0][1])
    flow_directions = analysis.get_array_from_file(path=path0, var_name="flow_direction")
    lake_fraction = analysis.get_array_from_file(path=path0, var_name="lake_fraction")

    # mask lake fraction in the ocean
    lake_fraction = np.ma.masked_where((flow_directions <= 0) | (flow_directions > 128), lake_fraction)

    accumulation_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    area_m2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_M2)

    # Try to read cell areas im meters if it is not Ok then try in km2
    if area_m2 is not None:
        cell_area_km2 = area_m2 * 1.0e-6
    else:
        cell_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_KM2)

    print("cell area ranges from {} to {}".format(cell_area_km2.min(), cell_area_km2.max()))

    # print "plotting from {0}".format(path0)
    # plt.pcolormesh(lake_fraction.transpose())
    # plt.colorbar()
    # plt.show()
    # exit()

    file_scores = open("scores_{0}_{1}-{2}.txt".format("_".join(label_list), start_year, end_year), "w")
    file_correlations = open("corr_{0}_{1}-{2}.txt".format("_".join(label_list), start_year, end_year), "w")
    file_annual_discharge = open("flow_{0}_{1}-{2}.txt".format("_".join(label_list), start_year, end_year), "w")

    text_files = [file_scores, file_correlations, file_annual_discharge]
    # write the following columns to the scores file
    header_format = "{0:10s}\t{1:10s}\t{2:10s}\t" + "\t".join(["{" + str(i + 3) + ":10s}"
                                                               for i in range(len(sim_name_to_file_name))])
    line_format = "{0:10s}\t{1:10.1f}\t{2:10.1f}\t" + "\t".join(["{" + str(i + 3) + ":10.1f}"
                                                                 for i in range(len(sim_name_to_file_name))])

    header_ns = ("ID", "DAo", "DAm",) + tuple(["NS({0})".format(key) for key in sim_name_to_file_name])
    file_scores.write(header_format.format(*header_ns) + "\n")

    header_qyear = ("ID", "DAo", "DAm",) + tuple(["Qyear({0})".format(key) for key in label_list]) + \
                   ("Qyear(obs)",)
    header_format_qyear = header_format + "\t{" + str(len(label_list) + 3) + ":10s}"
    file_annual_discharge.write(header_format_qyear.format(*header_qyear) + "\n")

    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path0)

    # Create a cell manager if it is not provided
    if cell_manager is None:
        cell_manager = CellManager(flow_directions, accumulation_area_km2=accumulation_area_km2,
                                   lons2d=lons2d, lats2d=lats2d)

    if stations is not None:
        # Get the list of the corresponding model points
        station_to_modelpoint = cell_manager.get_model_points_for_stations(
            station_list=stations,
            lake_fraction=lake_fraction,
            drainaige_area_reldiff_limit=drainage_area_reldiff_min)

        station_list = list(station_to_modelpoint.keys())
        station_list.sort(key=lambda st1: st1.latitude, reverse=True)
        mp_list = [station_to_modelpoint[st] for st in station_list]
    else:
        mp_list = model_points
        station_list = None
        # sort so that the northernmost stations appear uppermost
        mp_list.sort(key=lambda mpt: mpt.latitude, reverse=True)


    # set ids to the model points so they can be distinguished easier
    model_point.set_model_point_ids(mp_list)


    # ###Uncomment the lines below for the validation plot in paper 2
    # brewer2mpl.get_map args: set name  set type  number of colors
    # bmap = brewer2mpl.get_map("Set1", "qualitative", 9)
    # Change the default colors
    # mpl.rcParams["axes.color_cycle"] = bmap.mpl_colors

    # For the streamflow only plot
    ncols = 3
    nrows = max(len(mp_list) // ncols, 1)
    if ncols * nrows < len(mp_list):
        nrows += 1

    figure_stfl = plt.figure(figsize=(4 * ncols, 3 * nrows))
    gs_stfl = gridspec.GridSpec(nrows=nrows, ncols=ncols)
    # a flag which signifies if a legend should be added to the plot, it is needed so we ahve only one legend per plot
    legend_added = False

    ax_stfl = None
    all_years = [y for y in range(start_year, end_year + 1)]

    if station_list is not None:
        processed_stations = station_list
    else:
        processed_stations = [None] * len(mp_list)
    processed_model_points = mp_list
    plot_point_positions_with_upstream_areas(processed_stations, processed_model_points, basemap,
                                             cell_manager, lake_fraction_field=lake_fraction)

    if plot_upstream_area_averaged:
        # create obs data managers
        anusplin_tmin = AnuSplinManager(variable="stmn")
        anusplin_tmax = AnuSplinManager(variable="stmx")
        anusplin_pcp = AnuSplinManager(variable="pcp")

        daily_dates, obs_tmin_fields = anusplin_tmin.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        _, obs_tmax_fields = anusplin_tmax.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        _, obs_pcp_fields = anusplin_pcp.get_daily_clim_fields_interpolated_to(
            start_year=start_year, end_year=end_year,
            lons_target=lons2d, lats_target=lats2d)

        swe_path = "/skynet3_rech1/huziy/swe_ross_brown/swe.nc4"
        if not os.path.isfile(os.path.realpath(swe_path)):
            raise IOError("SWE-obs file {} does not exist".format(swe_path))

        swe_manager = SweDataManager(path=swe_path, var_name="SWE")
        obs_swe_daily_clim = swe_manager.get_daily_climatology(start_year, end_year)
        interpolated_obs_swe_clim = swe_manager.interpolate_daily_climatology_to(obs_swe_daily_clim,
                                                                                 lons2d_target=lons2d,
                                                                                 lats2d_target=lats2d)
    values_obs = None

    for i, the_model_point in enumerate(mp_list):

        ax_stfl = figure_stfl.add_subplot(gs_stfl[i // ncols, i % ncols], sharex=ax_stfl)

        assert isinstance(the_model_point, ModelPoint)

        # Check the number of years accessible for the station if the list of stations is given
        the_station = None if station_list is None else station_list[i]
        if the_station is not None:
            assert isinstance(the_station, Station)
            year_list = the_station.get_list_of_complete_years()
            year_list = list(filter(lambda yi: start_year <= yi <= end_year, year_list))

            if len(year_list) < 1:
                continue
        else:
            year_list = all_years

        fig = plt.figure(figsize=(12, 15))

        gs = gridspec.GridSpec(4, 4, wspace=1)


        # plot station position
        ax = fig.add_subplot(gs[3, 0:2])
        upstream_mask = _plot_station_position(ax, the_station, basemap, cell_manager, the_model_point)



        # plot streamflows
        ax = fig.add_subplot(gs[0:2, 0:2])

        dates = None
        model_daily_temp_clim = {}
        model_daily_precip_clim = {}
        model_daily_clim_surf_runoff = {}
        model_daily_clim_subsurf_runoff = {}
        model_daily_clim_swe = {}

        # get model data for the list of years
        simlabel_to_vals = {}
        for label in label_list:
            fname = sim_name_to_file_name[label]

            if hdf_folder is None:
                fpath = fname
            else:
                fpath = os.path.join(hdf_folder, fname)

            if plot_upstream_area_averaged:
                # read temperature data and calculate daily climatologic fileds
                _, model_daily_temp_clim[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TT", level=0, start_year=start_year, end_year=end_year)

                # read modelled precip and calculate daily climatologic fields
                _, model_daily_precip_clim[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="PR", level=0, start_year=start_year, end_year=end_year)

                # read modelled surface runoff and calculate daily climatologic fields
                _, model_daily_clim_surf_runoff[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TRAF", level=0, start_year=start_year, end_year=end_year)

                # read modelled subsurface runoff and calculate daily climatologic fields
                _, model_daily_clim_subsurf_runoff[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="TDRA", level=0, start_year=start_year, end_year=end_year)

                # read modelled swe and calculate daily climatologic fields
                _, model_daily_clim_swe[label] = analysis.get_daily_climatology(
                    path_to_hdf_file=fpath, var_name="I5", level=0, start_year=start_year, end_year=end_year)

            dates, values_model = analysis.get_daily_climatology_for_a_point(path=fpath,
                                                                             var_name=stfl_name,
                                                                             years_of_interest=year_list,
                                                                             i_index=the_model_point.ix,
                                                                             j_index=the_model_point.jy)

            ax.plot(dates, values_model, label=label, lw=2)

            if sim_name_to_color is None:
                ax_stfl.plot(dates, values_model, label=label, lw=2)
            else:
                ax_stfl.plot(dates, values_model, sim_name_to_color[label], label=label, lw=2)

                print(20 * "!!!")
                print("{} -> {}".format(label, sim_name_to_color[label]))
                print(20 * "!!!")

            simlabel_to_vals[label] = values_model

        if the_station is not None:
            assert isinstance(the_station, Station)
            dates, values_obs = the_station.get_daily_climatology_for_complete_years_with_pandas(stamp_dates=dates,
                                                                                                 years=year_list)

            # To keep the colors consistent for all the variables, the obs Should be plotted last
            ax.plot(dates, values_obs, label="Obs.", lw=2)
            # no ticklabels for streamflow plot
            plt.setp(ax.get_xticklabels(), visible=False)

            if sim_name_to_color is None:
                ax_stfl.plot(dates, values_obs, label="Obs.", lw=2)
            else:
                ax_stfl.plot(dates, values_obs, label="Obs.", lw=2, color=sim_name_to_color["Obs."])

            # Print excesss from streamflow validation
            for label, values_model in simlabel_to_vals.items():
                calclulate_spring_peak_err(dates, values_obs, values_model,
                                           st_id="{}: {}".format(label, the_station.id),
                                           da_mod=the_model_point.accumulation_area,
                                           da_obs=the_station.drainage_km2)





        ax.set_ylabel(r"Streamflow: ${\rm m^3/s}$")
        assert isinstance(ax, Axes)
        assert isinstance(fig, Figure)

        upstream_area_km2 = np.sum(cell_area_km2[upstream_mask == 1])
        # Put some information about the point
        if the_station is not None:
            lf_upstream = lake_fraction[upstream_mask == 1]
            point_info = "{0}".format(the_station.id)
            write_annual_flows_to_txt(label_list, simlabel_to_vals, values_obs, file_annual_discharge,
                                      station_id=the_station.id,
                                      da_obs=the_station.drainage_km2, da_mod=the_model_point.accumulation_area)

        else:
            point_info = "{0}".format(the_model_point.point_id)

        ax.annotate(point_info, (0.8, 0.8), xycoords="axes fraction",
                    bbox=dict(facecolor="white", alpha=0.5),
                    va="top", ha="right")

        ax.legend(loc=(0.0, 1.05), borderaxespad=0, ncol=3)
        ax.xaxis.set_minor_formatter(FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0]))
        ax.xaxis.set_minor_locator(MonthLocator(bymonthday=15))
        ax.xaxis.set_major_locator(MonthLocator())

        ax.grid()

        streamflow_axes = ax  # save streamflow axes for later use

        if not legend_added:
            ax_stfl.legend(loc="lower left", bbox_to_anchor=(0, 1.15), borderaxespad=0, ncol=3)
            ax_stfl.xaxis.set_minor_formatter(FuncFormatter(lambda x, pos: num2date(x).strftime("%b")[0]))
            ax_stfl.xaxis.set_minor_locator(MonthLocator(bymonthday=15))
            ax_stfl.xaxis.set_major_locator(MonthLocator())

            ax_stfl.set_ylabel(r"Streamflow ${\rm m^3/s}$")
            legend_added = True

        plt.setp(ax_stfl.get_xmajorticklabels(), visible=False)
        ax_stfl.yaxis.set_major_locator(MaxNLocator(nbins=5))
        sfmt = ScalarFormatter(useMathText=True)
        sfmt.set_powerlimits((-2, 2))
        ax_stfl.yaxis.set_major_formatter(sfmt)
        ax_stfl.grid()

        # annotate streamflow-only panel plot
        ax_stfl.annotate(point_info, (0.05, 0.95), xycoords="axes fraction",
                         bbox=dict(facecolor="white"),
                         va="top", ha="left")


        if plot_upstream_area_averaged:
            # plot temperature comparisons (tmod - daily with anusplin tmin and tmax)
            ax = fig.add_subplot(gs[3, 2:], sharex=streamflow_axes)
            _validate_temperature_with_anusplin(ax, the_model_point, cell_area_km2=cell_area_km2,
                                                upstream_mask=upstream_mask,
                                                daily_dates=daily_dates,
                                                obs_tmin_clim_fields=obs_tmin_fields,
                                                obs_tmax_clim_fields=obs_tmax_fields,
                                                model_data_dict=model_daily_temp_clim,
                                                simlabel_list=label_list)

            # plot temperature comparisons (tmod - daily with anusplin tmin and tmax)
            ax = fig.add_subplot(gs[2, 2:], sharex=streamflow_axes)
            _validate_precip_with_anusplin(ax, the_model_point, cell_area_km2=cell_area_km2,
                                           upstream_mask=upstream_mask,
                                           daily_dates=daily_dates,
                                           obs_precip_clim_fields=obs_pcp_fields,
                                           model_data_dict=model_daily_precip_clim,
                                           simlabel_list=label_list)


            # plot mean upstream surface runoff
            ax = fig.add_subplot(gs[0, 2:], sharex=streamflow_axes)
            _plot_upstream_surface_runoff(ax, the_model_point, cell_area_km2=cell_area_km2,
                                          upstream_mask=upstream_mask,
                                          daily_dates=daily_dates,
                                          model_data_dict=model_daily_clim_surf_runoff,
                                          simlabel_list=label_list)


            # plot mean upstream subsurface runoff
            ax = fig.add_subplot(gs[1, 2:], sharex=streamflow_axes, sharey=ax)
            _plot_upstream_subsurface_runoff(ax, the_model_point, cell_area_km2=cell_area_km2,
                                             upstream_mask=upstream_mask,
                                             daily_dates=daily_dates,
                                             model_data_dict=model_daily_clim_subsurf_runoff,
                                             simlabel_list=label_list)

            # plot mean upstream swe comparison
            ax = fig.add_subplot(gs[2, 0:2], sharex=streamflow_axes)
            print("Validating SWE for ", the_station.id, "--" * 20)
            _validate_swe_with_ross_brown(ax, the_model_point, cell_area_km2=cell_area_km2,
                                          upstream_mask=upstream_mask,
                                          daily_dates=daily_dates,
                                          model_data_dict=model_daily_clim_swe,
                                          obs_swe_clim_fields=interpolated_obs_swe_clim,
                                          simlabel_list=label_list)

        if the_station is not None:
            im_name = "comp_point_with_obs_{0}_{1}_{2}.png".format(the_station.id,
                                                                   the_station.source,
                                                                   "_".join(label_list))
            im_folder_path = os.path.join(images_folder, the_station.source)
        else:
            im_name = "comp_point_with_obs_{0}_{1}.png".format(the_model_point.point_id,
                                                               "_".join(label_list))
            im_folder_path = os.path.join(images_folder, "outlets_point_comp")


        # create a folder for a given source of observed streamflow if it does not exist yet
        if not os.path.isdir(im_folder_path):
            os.mkdir(im_folder_path)

        im_path = os.path.join(im_folder_path, im_name)

        if plot_upstream_area_averaged:
            fig.savefig(im_path, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight", transparent=True)

        plt.close(fig)


        # return  # temporary plot only one point

    assert isinstance(figure_stfl, Figure)
    figure_stfl.tight_layout()
    figure_stfl.savefig(os.path.join(images_folder,
                                     "comp_point_with_obs_{0}.png".format("_".join(label_list))),
                        bbox_inches="tight", transparent=True, dpi=cpp.FIG_SAVE_DPI)
    plt.close(figure_stfl)

    # close information text files
    for f in text_files:
        f.close()
Ejemplo n.º 11
0
def compare(paths=None,
            path_to_control_data=None,
            control_label="",
            labels=None,
            varnames=None,
            levels=None,
            months_of_interest=None,
            start_year=None,
            end_year=None):
    """
    Comparing 2D fields
    :param paths: paths to the simulation results
    :param varnames:
    :param labels: Display name for each simulation (number of labels should
     be equal to the number of paths)
    :param path_to_control_data: the path with which the comparison done i.e. a in the following
     formula
            delta = (x - a)/a * 100%

     generates one image file per variable (in the folder images_for_lake-river_paper):
        compare_varname_<control_label>_<label1>_..._<labeln>_startyear_endyear.png

    """
    # get coordinate data  (assumes that all the variables and runs have the same coordinates)
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(
        file_path=path_to_control_data)
    x, y = basemap(lons2d, lats2d)

    lake_fraction = analysis.get_array_from_file(path=path_to_control_data,
                                                 var_name="lake_fraction")

    if lake_fraction is None:
        lake_fraction = np.zeros(lons2d.shape)

    ncolors = 10
    # +1 to include white
    diff_cmap = cm.get_cmap("RdBu_r", ncolors + 1)

    for var_name, level in zip(varnames, levels):
        sfmt = infovar.get_colorbar_formatter(var_name)
        control_means = analysis.get_mean_2d_fields_for_months(
            path=path_to_control_data,
            var_name=var_name,
            months=months_of_interest,
            start_year=start_year,
            end_year=end_year,
            level=level)

        control_mean = np.mean(control_means, axis=0)
        fig = plt.figure()
        assert isinstance(fig, Figure)
        gs = gridspec.GridSpec(2, len(paths) + 1, wspace=0.5)

        # plot the control
        ax = fig.add_subplot(gs[0, 0])
        assert isinstance(ax, Axes)
        ax.set_title("{0}".format(control_label))
        ax.set_ylabel("Mean: $X_{0}$")
        to_plot = infovar.get_to_plot(var_name,
                                      control_mean,
                                      lake_fraction=lake_fraction,
                                      mask_oceans=True,
                                      lons=lons2d,
                                      lats=lats2d)
        # determine colorabr extent and spacing
        field_cmap, field_norm = infovar.get_colormap_and_norm_for(
            var_name, to_plot, ncolors=ncolors)

        basemap.pcolormesh(x, y, to_plot, cmap=field_cmap, norm=field_norm)
        cb = basemap.colorbar(format=sfmt)

        assert isinstance(cb, Colorbar)
        # cb.ax.set_ylabel(infovar.get_units(var_name))
        units = infovar.get_units(var_name)

        info = "Variable:" \
               "\n{0}" \
               "\nPeriod: {1}-{2}" \
               "\nMonths: {3}" \
               "\nUnits: {4}"

        info = info.format(
            infovar.get_long_name(var_name), start_year, end_year, ",".join([
                datetime(2001, m, 1).strftime("%b") for m in months_of_interest
            ]), units)

        ax.annotate(info, xy=(0.1, 0.3), xycoords="figure fraction")

        sel_axes = [ax]

        for the_path, the_label, column in zip(paths, labels,
                                               list(range(1,
                                                          len(paths) + 1))):

            means_for_years = analysis.get_mean_2d_fields_for_months(
                path=the_path,
                var_name=var_name,
                months=months_of_interest,
                start_year=start_year,
                end_year=end_year)
            the_mean = np.mean(means_for_years, axis=0)

            # plot the mean value
            ax = fig.add_subplot(gs[0, column])
            sel_axes.append(ax)
            ax.set_title("{0}".format(the_label))
            to_plot = infovar.get_to_plot(var_name,
                                          the_mean,
                                          lake_fraction=lake_fraction,
                                          mask_oceans=True,
                                          lons=lons2d,
                                          lats=lats2d)

            basemap.pcolormesh(x, y, to_plot, cmap=field_cmap, norm=field_norm)
            ax.set_ylabel("Mean: $X_{0}$".format(column))
            cb = basemap.colorbar(format=sfmt)
            # cb.ax.set_ylabel(infovar.get_units(var_name))

            # plot the difference
            ax = fig.add_subplot(gs[1, column])
            sel_axes.append(ax)
            ax.set_ylabel("$X_{0} - X_0$".format(column))

            # #Mask only if the previous plot (means) is masked
            thediff = the_mean - control_mean

            if hasattr(to_plot, "mask"):
                to_plot = np.ma.masked_where(to_plot.mask, thediff)
            else:
                to_plot = thediff

            if var_name == "PR":  # convert to mm/day
                to_plot = infovar.get_to_plot(var_name,
                                              to_plot,
                                              mask_oceans=False)

            vmin = np.ma.min(to_plot)
            vmax = np.ma.max(to_plot)

            d = max(abs(vmin), abs(vmax))
            vmin = -d
            vmax = d

            field_norm, bounds, vmn_nice, vmx_nice = infovar.get_boundary_norm(
                vmin, vmax, diff_cmap.N, exclude_zero=False)
            basemap.pcolormesh(x,
                               y,
                               to_plot,
                               cmap=diff_cmap,
                               norm=field_norm,
                               vmin=vmn_nice,
                               vmax=vmx_nice)

            cb = basemap.colorbar(format=sfmt)

            t, pval = ttest_ind(means_for_years, control_means, axis=0)
            sig = pval < 0.1
            basemap.contourf(x,
                             y,
                             sig.astype(int),
                             nlevels=2,
                             hatches=["+", None],
                             colors="none")

            # cb.ax.set_ylabel(infovar.get_units(var_name))

        # plot coastlines
        for the_ax in sel_axes:
            basemap.drawcoastlines(
                ax=the_ax, linewidth=common_plot_params.COASTLINE_WIDTH)

        # depends on the compared simulations and the months of interest
        fig_file_name = "compare_{0}_{1}_{2}_months-{3}.jpeg".format(
            var_name, control_label, "_".join(labels),
            "-".join([str(m) for m in months_of_interest]))
        figpath = os.path.join(images_folder, fig_file_name)
        fig.savefig(figpath, dpi=cpp.FIG_SAVE_DPI, bbox_inches="tight")
        plt.close(fig)
Ejemplo n.º 12
0
def plot_control_and_differences_in_one_panel_for_all_seasons_for_all_vars(
        varnames=None,
        levels=None,
        season_to_months=None,
        start_year=None,
        end_year=None):
    season_list = list(season_to_months.keys())

    pvalue_max = 0.1

    # crcm5-r vs crcm5-hcd-r
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r_spinup.hdf"
    # control_label = "CRCM5-R"
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r_spinup2.hdf", ]
    # labels = ["CRCM5-HCD-R"]

    # crcm5-hcd-rl vs crcm5-hcd-r
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r_spinup2.hdf"
    # control_label = "CRCM5-HCD-R"
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf", ]
    # labels = ["CRCM5-HCD-RL"]

    # compare simulations with and without interflow
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
    # control_label = "CRCM5-HCD-RL"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf", ]
    # labels = ["CRCM5-HCD-RL-INTFL"]

    # very high hydr cond
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_do_not_discard_small.hdf"
    # control_label = "CRCM5-HCD-RL-INTFL"
    ##
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_sani-10000.hdf", ]
    # labels = ["CRCM5-HCD-RL-INTFL-sani=10000"]

    # Interflow effect
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl_spinup.hdf"
    # control_label = "CRCM5-HCD-RL"
    # ##
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_spinup_ITFS.hdf5", ]
    # labels = ["ITFS"]

    # total lake effect
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r.hdf5"
    # control_label = "CRCM5-NL"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5", ]
    # labels = ["CRCM5-L2", ]

    # lake effect (lake-atm interactions)
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-r.hdf5"
    # control_label = "CRCM5-R"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5", ]
    # labels = ["CRCM5-HCD-R", ]

    # lake effect (lake-river interactions)
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-r.hdf5"
    # control_label = "CRCM5-L1"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5", ]
    # labels = ["CRCM5-HCD-L2", ]

    # interflow effect ()
    control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5"
    control_label = "CRCM5-L2"

    paths = [
        "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5",
    ]
    labels = [
        "CRCM5-L2I",
    ]

    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5", ]
    # labels = ["CRCM5-HCD-RL-INTFb", ]

    # interflow effect (avoid truncation and bigger slopes)
    # control_path = "/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS.hdf5"
    # control_label = "CRCM5-HCD-RL-INTF"
    #
    # paths = ["/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl-intfl_ITFS_avoid_truncation1979-1989.hdf5", ]
    # labels = ["CRCM5-HCD-RL-INTF-improved", ]
    #

    row_labels = [r"{} vs {}".format(s, control_label) for s in labels]
    print(labels)

    # varnames = ["QQ", ]
    # levels = [None, ]

    assert len(levels) == len(varnames)

    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(
        file_path=control_path)
    x, y = basemap(lons2d, lats2d)
    # save the domain properties for reuse
    domain_props = DomainProperties()
    domain_props.basemap = basemap
    domain_props.lons2d = lons2d
    domain_props.lats2d = lats2d
    domain_props.x = x
    domain_props.y = y

    lake_fraction = analysis.get_array_from_file(
        path=control_path, var_name=infovar.HDF_LAKE_FRACTION_NAME)
    dpth_to_bedrock = analysis.get_array_from_file(
        path=control_path, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)

    assert dpth_to_bedrock is not None

    if lake_fraction is None:
        lake_fraction = np.zeros(lons2d.shape)

    ncolors = 10
    # +1 to include white
    diff_cmap = cm.get_cmap("RdBu", ncolors + 1)

    # Do the plotting for each variable
    fig = plt.figure()
    assert isinstance(fig, Figure)

    # plot the control data
    ncols = len(season_list) + 1  # +1 is for the colorbar
    gs = gridspec.GridSpec(len(varnames),
                           ncols,
                           width_ratios=[
                               1.0,
                           ] * (ncols - 1) + [0.07],
                           top=0.95)

    lev_width_3d = np.ones(dpth_to_bedrock.shape +
                           infovar.soil_layer_widths_26_to_60.shape)
    lev_width_3d *= infovar.soil_layer_widths_26_to_60[np.newaxis,
                                                       np.newaxis, :]
    lev_bot_3d = lev_width_3d.cumsum(axis=2)

    correction = -lev_bot_3d + dpth_to_bedrock[:, :, np.newaxis]
    # Apply the correction only at points where the layer bottom is lower than
    # the bedrock
    lev_width_3d[correction < 0] += correction[correction < 0]
    lev_width_3d[lev_width_3d < 0] = 0

    # plot the plots one file per variable
    for var_name, level, the_row in zip(varnames, levels,
                                        list(range(len(varnames)))):
        sfmt = infovar.get_colorbar_formatter(var_name)
        season_to_control_mean = {}
        label_to_season_to_difference = {}
        label_to_season_to_significance = {}

        try:
            # Calculate the difference for each season, and save the results to dictionaries
            # to access later when plotting
            for season, months_of_interest in season_to_months.items():
                print("working on season: {0}".format(season))

                control_means = analysis.get_mean_2d_fields_for_months(
                    path=control_path,
                    var_name=var_name,
                    months=months_of_interest,
                    start_year=start_year,
                    end_year=end_year,
                    level=level)

                control_mean = np.mean(control_means, axis=0)

                control_mean = infovar.get_to_plot(
                    var_name,
                    control_mean,
                    lake_fraction=domain_props.lake_fraction,
                    lons=lons2d,
                    lats=lats2d,
                    level_width_m=lev_width_3d[:, :, level])

                # multiply by the number of days in a season for PR and TRAF to convert them into mm from mm/day
                if var_name in ["PR", "TRAF", "TDRA"]:
                    control_mean *= get_num_days(months_of_interest)
                    infovar.change_units_to(varnames=[
                        var_name,
                    ],
                                            new_units=r"${\rm mm}$")

                season_to_control_mean[season] = control_mean

                print("calculated mean from {0}".format(control_path))

                # calculate the difference for each simulation
                for the_path, the_label in zip(paths, row_labels):
                    modified_means = analysis.get_mean_2d_fields_for_months(
                        path=the_path,
                        var_name=var_name,
                        months=months_of_interest,
                        start_year=start_year,
                        end_year=end_year,
                        level=level)

                    tval, pval = ttest_ind(modified_means,
                                           control_means,
                                           axis=0,
                                           equal_var=False)
                    significance = ((pval <= pvalue_max) &
                                    (~control_mean.mask)).astype(int)
                    print("pval ranges: {} to {}".format(
                        pval.min(), pval.max()))

                    modified_mean = np.mean(modified_means, axis=0)
                    if the_label not in label_to_season_to_difference:
                        label_to_season_to_difference[the_label] = OrderedDict(
                        )
                        label_to_season_to_significance[
                            the_label] = OrderedDict()

                    modified_mean = infovar.get_to_plot(
                        var_name,
                        modified_mean,
                        lake_fraction=domain_props.lake_fraction,
                        lons=lons2d,
                        lats=lats2d,
                        level_width_m=lev_width_3d[:, :, level])

                    # multiply by the number of days in a season for PR and TRAF to convert them into mm from mm/day
                    if var_name in ["PR", "TRAF", "TDRA"]:
                        modified_mean *= get_num_days(months_of_interest)

                    diff_vals = modified_mean - control_mean

                    print("diff ranges: min: {0};  max: {1}".format(
                        diff_vals.min(), diff_vals.max()))
                    label_to_season_to_difference[the_label][
                        season] = diff_vals
                    label_to_season_to_significance[the_label][
                        season] = significance

                    print("Calculated mean and diff from {0}".format(the_path))
        except NoSuchNodeError:
            print("Could not find {0}, skipping...".format(var_name))
            continue

        for the_label, data in label_to_season_to_difference.items():
            axes = []
            for col in range(ncols):
                axes.append(fig.add_subplot(gs[the_row, col]))

            # Set season titles
            if the_row == 0:
                for the_season, ax in zip(season_list, axes):
                    ax.set_title(the_season)

            _plot_row(axes,
                      data,
                      the_label,
                      var_name,
                      increments=True,
                      domain_props=domain_props,
                      season_list=season_list,
                      significance=label_to_season_to_significance[the_label])

            var_label = infovar.get_long_display_label_for_var(var_name)
            if var_name in ["I1"]:
                var_label = "{}\n{} layer".format(var_label,
                                                  ordinal(level + 1))

            axes[0].set_ylabel(var_label)

    fig.suptitle("({}) vs ({})".format(labels[0], control_label),
                 font_properties=FontProperties(weight="bold"))
    folderpath = os.path.join(
        images_folder, "seasonal_mean_maps/{0}_vs_{1}_for_{2}_{3}-{4}".format(
            "_".join(labels), control_label,
            "-".join(list(season_to_months.keys())), start_year, end_year))

    if not os.path.isdir(folderpath):
        os.mkdir(folderpath)

    imname = "{0}_{1}.png".format("-".join(varnames),
                                  "_".join(labels + [control_label]))
    impath = os.path.join(folderpath, imname)
    fig.savefig(impath, bbox_inches="tight")
Ejemplo n.º 13
0
def main():
    lkfr_limit = 0.05
    model_data_current_path = "/skynet3_rech1/huziy/hdf_store/cc-canesm2-driven/" \
                         "quebec_0.1_crcm5-hcd-rl-cc-canesm2-1980-2010.hdf5"


    modif_label = "CanESM2-CRCM5-L"

    start_year_c = 1980
    end_year_c = 2010

    future_shift_years = 90

    params = dict(
        start_year=start_year_c, end_year=end_year_c
    )

    params.update(
        dict(data_path=model_data_current_path, label=modif_label)
    )

    model_config_c = RunConfig(**params)
    model_config_f = model_config_c.get_shifted_config(future_shift_years)



    bmp_info = analysis.get_basemap_info(r_config=model_config_c)


    specific_cond_heat = 0.250100e7  # J/kg
    water_density = 1000.0  # kg/m**3

    season_to_months = OrderedDict([
        ("Summer", [6, 7, 8]),
    ])

    lkfr = analysis.get_array_from_file(path="/RESCUE/skynet3_rech1/huziy/hdf_store/quebec_0.1_crcm5-hcd-rl.hdf5", var_name=infovar.HDF_LAKE_FRACTION_NAME)

    assert lkfr is not None, "Could not find lake fraction in the file"

    # Current climate
    traf_c = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_c, varname="TRAF", level=5, season_to_months=season_to_months)
    pr_c = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_c, varname="PR", level=0, season_to_months=season_to_months)

    lktemp_c = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_c, varname="L1", level=0, season_to_months=season_to_months)
    airtemp_c = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_c, varname="TT", level=0, season_to_months=season_to_months)

    lhc = OrderedDict([
        (s, specific_cond_heat * (pr_c[s] * water_density - traf_c[s])) for s, traf in traf_c.items()
    ])



    avc = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_c, varname="AV", level=0, season_to_months=season_to_months)


    plt.figure()
    lhc["Summer"] = np.ma.masked_where(lkfr < lkfr_limit, lhc["Summer"])
    print("min: {}, max: {}".format(lhc["Summer"].min(), lhc["Summer"].max()))
    cs = plt.contourf(lhc["Summer"].T)
    plt.title("lhc")
    plt.colorbar()

    plt.figure()
    cs = plt.contourf(avc["Summer"].T, levels=cs.levels, norm=cs.norm, cmap=cs.cmap)
    plt.title("avc")
    plt.colorbar()

    # Future climate
    traf_f = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_f, varname="TRAF", level=5, season_to_months=season_to_months)
    pr_f = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_f, varname="PR", level=0,
                                                           season_to_months=season_to_months)

    lktemp_f = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_f, varname="L1", level=0,
                                                             season_to_months=season_to_months)
    airtemp_f = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_f, varname="TT", level=0,
                                                              season_to_months=season_to_months)

    lhf = OrderedDict([
        (s, specific_cond_heat * (pr_f[s] * water_density - traf_f[s])) for s, traf in traf_f.items()
    ])

    plt.figure()
    plt.pcolormesh(traf_c["Summer"].T)
    plt.title("TRAF over lakes current")
    plt.colorbar()



    avf = analysis.get_seasonal_climatology_for_runconfig(run_config=model_config_f, varname="AV", level=0,
                                                          season_to_months=season_to_months)

    plt.figure()
    cs = plt.contourf(avf["Summer"].T)
    plt.title("avf")
    plt.colorbar()


    plt.figure()
    cs = plt.contourf(avf["Summer"].T - avc["Summer"].T, levels=np.arange(-40, 45, 5))
    plt.title("d(av)")
    plt.colorbar()


    plt.figure()
    plt.contourf(lhf["Summer"].T - lhc["Summer"].T, levels=cs.levels, cmap=cs.cmap, norm=cs.norm)
    plt.title("d(lh)")
    plt.colorbar()



    # plotting
    plot_utils.apply_plot_params(width_cm=15, height_cm=15, font_size=10)
    gs = GridSpec(2, 2)




    # tair_c_ts = analysis.get_area_mean_timeseries(model_config_c.data_path, var_name="TT", level_index=0,
    #                                   start_year=model_config_c.start_year, end_year=model_config_c.end_year,
    #                                   the_mask=lkfr >= lkfr_limit)
    #
    # tair_f_ts = analysis.get_area_mean_timeseries(model_config_f.data_path, var_name="TT", level_index=0,
    #                                   start_year=model_config_f.start_year, end_year=model_config_f.end_year,
    #                                   the_mask=lkfr >= lkfr_limit)
    #
    #
    # tlake_c_ts = analysis.get_area_mean_timeseries(model_config_c.data_path, var_name="TT", level_index=0,
    #                                   start_year=model_config_c.start_year, end_year=model_config_c.end_year,
    #                                   the_mask=lkfr >= lkfr_limit)
    #
    # tlake_f_ts = analysis.get_area_mean_timeseries(model_config_f.data_path, var_name="TT", level_index=0,
    #                                   start_year=model_config_f.start_year, end_year=model_config_f.end_year,
    #                                   the_mask=lkfr >= lkfr_limit)






    for season in season_to_months:
        fig = plt.figure()


        lktemp_c[season] -= 273.15
        dT_c = np.ma.masked_where(lkfr < lkfr_limit, lktemp_c[season] - airtemp_c[season])


        lktemp_f[season] -= 273.15
        dT_f = np.ma.masked_where(lkfr < lkfr_limit, lktemp_f[season] - airtemp_f[season])

        d = np.round(max(np.ma.abs(dT_c).max(), np.ma.abs(dT_f).max()))

        vmin = -d
        vmax = d

        clevs = np.arange(-12, 13, 1)
        ncolors = len(clevs) - 1
        bn = BoundaryNorm(clevs, ncolors=ncolors)
        cmap = cm.get_cmap("seismic", ncolors)




        ax_list = []

        fig.suptitle(season)

        xx, yy = bmp_info.get_proj_xy()

        # Current gradient
        ax = fig.add_subplot(gs[0, 0])
        ax.set_title(r"current: $T_{\rm lake} - T_{\rm atm}$")
        cs = bmp_info.basemap.pcolormesh(xx, yy, dT_c, ax=ax, norm=bn, cmap=cmap)
        bmp_info.basemap.colorbar(cs, ax=ax, extend="both")
        ax_list.append(ax)



        # Future Gradient
        ax = fig.add_subplot(gs[0, 1])
        ax.set_title(r"future: $T_{\rm lake} - T_{\rm atm}$")
        cs = bmp_info.basemap.pcolormesh(xx, yy, dT_f, ax=ax, norm=cs.norm, cmap=cs.cmap, vmin=vmin, vmax=vmax)
        bmp_info.basemap.colorbar(cs, ax=ax, extend="both")
        ax_list.append(ax)


        # Change in the gradient
        ax = fig.add_subplot(gs[1, 0])
        ax.set_title(r"$\Delta T_{\rm future} - \Delta T_{\rm current}$")

        ddT = dT_f - dT_c
        d = np.round(np.ma.abs(ddT).max())
        clevs = np.arange(-3, 3.1, 0.1)
        ncolors = len(clevs) - 1
        bn = BoundaryNorm(clevs, ncolors=ncolors)
        cmap = cm.get_cmap("seismic", ncolors)
        cs = bmp_info.basemap.pcolormesh(xx, yy, ddT, norm=bn, cmap=cmap)
        bmp_info.basemap.colorbar(cs, ax=ax, extend="both")
        ax_list.append(ax)



        # Change in the latent heat flux
        # ax = fig.add_subplot(gs[1, 1])
        # ax.set_title(r"$LE_{\rm future} - LE_{\rm current}$")
        # dlh = np.ma.masked_where(lkfr < lkfr_limit, lhf[season] - lhc[season])
        #
        # d = np.round(np.ma.abs(dlh).max() // 10) * 10
        # clevs = np.arange(0, 105, 5)
        # bn = BoundaryNorm(clevs, ncolors=ncolors)
        # cmap = cm.get_cmap("jet", ncolors)
        #
        # cs = bmp_info.basemap.pcolormesh(xx, yy, dlh, norm=bn, cmap=cmap)
        # bmp_info.basemap.colorbar(cs, ax=ax, extend="max")  # Change in the latent heat flux
        # ax_list.append(ax)


        for the_ax in ax_list:
            bmp_info.basemap.drawcoastlines(linewidth=0.3, ax=the_ax)


        fig.tight_layout()
        fig.savefig(os.path.join(img_folder, "lake_atm_gradients_and_fluxes_{}-{}_{}-{}.png".format(model_config_f.start_year, model_config_f.end_year, start_year_c, end_year_c)),
                    dpi=800,
                    bbox_inches="tight")
def get_mean_diffs(interflow_data_path="",
                   base_data_path="",
                   start_year=1980,
                   end_year=2010,
                   months_of_interest=(4, 5, 6, 7, 8, 9),
                   delete_cache=True):
    """
    Get mean differences for fixed variables, between interflow_data_path and base_data_path files
    :param interflow_data_path:
    :param base_data_path:
    :param start_year:
    :param end_year:
    :param months_of_interest:
    :return:
    """
    # Build the name of the cache file
    cache_file = "cache_extr_intf_effect{}-{}_{}.bin".format(
        start_year, end_year, "-".join(str(m) for m in months_of_interest))

    # Do not use caching by default
    if delete_cache:
        os.remove(cache_file)

    if os.path.isfile(cache_file):
        return pickle.load(open(cache_file))

    precip_limit = 0.0  # at least it should rain
    tt_limit = 0  # and the oil should not be frozen

    traf_diff = None  # surface runoff difference
    prcip_diff = None
    drainage_diff = None  # drainage difference
    i1_diff = None  # soil moisture difference
    months_query = "{}".format("|".join(
        ["(month=={})".format(m) for m in months_of_interest]))
    year_query = "(year >= {}) & (year <= {})".format(start_year, end_year)
    print("months_query = {}".format(months_query))

    depth_to_bedrock = pt_analysis.get_array_from_file(
        base_data_path, var_name=infovar.HDF_DEPTH_TO_BEDROCK_NAME)

    with tb.open_file(interflow_data_path) as h_intf:
        pr_intf_table = h_intf.get_node("/", "PR")
        tt_intf_table = h_intf.get_node("/", "TT")
        traf_intf_table = h_intf.get_node("/", "TRAF")
        tdra_intf_table = h_intf.get_node("/", "TDRA")
        i1_intf_table = h_intf.get_node("/", "I1")

        assert isinstance(pr_intf_table, tb.Table)
        assert isinstance(tt_intf_table, tb.Table)
        assert isinstance(traf_intf_table, tb.Table)
        assert isinstance(tdra_intf_table, tb.Table)

        print(len(pr_intf_table), len(tt_intf_table), len(traf_intf_table))

        with tb.open_file(base_data_path) as h_nointf:

            pr_nointf_table = h_nointf.get_node("/", "PR")
            tt_nointf_table = h_nointf.get_node("/", "TT")
            traf_nointf_table = h_nointf.get_node("/", "TRAF")
            tdra_nointf_table = h_nointf.get_node("/", "TDRA")
            i1_nointf_table = h_nointf.get_node("/", "I1")

            assert isinstance(pr_nointf_table, tb.Table)
            assert isinstance(tt_nointf_table, tb.Table)
            assert isinstance(traf_nointf_table, tb.Table)
            assert isinstance(tdra_nointf_table, tb.Table)

            for rownum, pr_intf_row in enumerate(
                    pr_intf_table.where("({}) & {}".format(
                        months_query, year_query))):
                year, month, day, hour = [
                    pr_intf_row[k] for k in ["year", "month", "day", "hour"]
                ]
                # print year, month, day, hour

                pr_intf_field = pr_intf_row["field"]
                tt_intf_field = None
                traf_intf_field = None
                tdra_intf_field = None
                i1_intf_field = None

                pr_nointf_field = None
                tt_nointf_field = None
                traf_nointf_field = None
                tdra_nointf_field = None
                i1_nointf_field = None

                # Get air temperature and precipitation for the same time
                tt_query = "(year == {}) & (month == {}) & (day == {}) & (hour == {})".format(
                    year, month, day, hour)
                traf_query = "{} & (level_index == {})".format(tt_query, 0)
                for tt_row in tt_intf_table.where(tt_query):
                    tt_intf_field = tt_row["field"]
                    break

                # print tt_intf_field.min(), tt_intf_field.max()

                for traf_row in traf_intf_table.where(traf_query):
                    traf_intf_field = traf_row["field"]
                    break

                for tdra_row in tdra_intf_table.where(traf_query):
                    tdra_intf_field = tdra_row["field"]
                    break

                for i1_row in i1_intf_table.where(traf_query):
                    i1_intf_field = i1_row["field"]
                    break

                # for no interflow simulation
                for tt_row in tt_nointf_table.where(tt_query):
                    tt_nointf_field = tt_row["field"]
                    break

                for pr_row in pr_nointf_table.where(tt_query):
                    pr_nointf_field = pr_row["field"]
                    break

                for traf_row in traf_nointf_table.where(traf_query):
                    traf_nointf_field = traf_row["field"]
                    break

                for tdra_row in tdra_nointf_table.where(traf_query):
                    tdra_nointf_field = tdra_row["field"]
                    break

                for i1_row in i1_nointf_table.where(traf_query):
                    i1_nointf_field = i1_row["field"]
                    break

                if traf_diff is None:
                    traf_diff = np.zeros(pr_intf_field.shape)
                    prcip_diff = np.zeros(pr_intf_field.shape)
                    drainage_diff = np.zeros(pr_intf_field.shape)
                    i1_diff = np.zeros(pr_intf_field.shape)

                points_of_interest = (
                    (pr_intf_field > precip_limit) &
                    (pr_nointf_field > precip_limit) &
                    (tt_intf_field > tt_limit) & (tt_nointf_field > tt_limit)
                    & (abs(pr_intf_field - pr_nointf_field) < 0.01 *
                       (pr_intf_field + pr_nointf_field) / 2.0))

                if rownum % 100 == 0:
                    print("Precipitation ranges in M/s")
                    print(pr_intf_field.min(), pr_intf_field.max())
                    print(pr_nointf_field.min(), pr_nointf_field.max())

                if traf_intf_field is None:
                    print("intf field is none")
                    print(traf_query)

                if traf_nointf_field is None:
                    print("nointf field is none")
                    print(traf_query)

                traf_diff[points_of_interest] += traf_intf_field[points_of_interest] - \
                                                 traf_nointf_field[points_of_interest]

                prcip_diff[points_of_interest] += pr_intf_field[points_of_interest] - \
                                                  pr_nointf_field[points_of_interest]

                drainage_diff[points_of_interest] += tdra_intf_field[points_of_interest] - \
                                                     tdra_nointf_field[points_of_interest]

                i1_diff[points_of_interest] += i1_intf_field[points_of_interest] - \
                                               i1_nointf_field[points_of_interest]

                # if rownum % 100 == 0 and debug_plots:
                #     fig = plt.figure()
                #     im = plt.pcolormesh(traf_diff.transpose() * 3 * 60 * 60)
                #     plt.colorbar(im)
                #     plt.savefig("{}/{}.jpg".format(img_dir, rownum))
                #     plt.close(fig)
                #
                #     plt.figure()
                #     im = plt.pcolormesh(traf_intf_field.transpose() * 60 * 60 * 24)
                #     plt.colorbar(im)
                #     plt.savefig("{}/traf_{}.jpg".format(img_dir, rownum))
                #     plt.close(fig)

    pickle.dump([traf_diff, prcip_diff, drainage_diff, i1_diff],
                open(cache_file, "w"))
    return traf_diff, prcip_diff, drainage_diff, i1_diff
def main(hdf_folder="/home/huziy/skynet3_rech1/hdf_store", start_year=1980, end_year=2010):
    prepare()

    all_markers = ["*", "s", "p", "+", "x", "d", "h"]

    excluded = ["white", "w", "aliceblue", "azure"]
    excluded.extend([ci for ci in colors.cnames if "yellow" in ci])

    all_colors = ["k", "b", "r", "g", "m"] + sorted([ci for ci in colors.cnames if ci not in excluded])

    # Station ids to get from the CEHQ database
    ids_with_lakes_upstream = [
        "104001", "093806", "093801", "081002", "081007", "080718"
    ]

    selected_ids = ids_with_lakes_upstream

    filedir = Path(hdf_folder)
    sim_name_to_file_path = OrderedDict([
        # ("CRCM5-LI", filedir.joinpath("quebec_0.1_crcm5-hcd-r.hdf5").as_posix()),

        ("ERAI-CRCM5-L", filedir.joinpath("quebec_0.1_crcm5-hcd-rl.hdf5").as_posix()),

        # ("CanESM2-CRCM5-NL", filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-r-cc-canesm2-1980-2010.hdf5").as_posix()),

        ("CanESM2-CRCM5-L",
         filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-hcd-rl-cc-canesm2-1980-2010.hdf5").as_posix()),

        # ("CanESM2-CRCM5-LI", filedir.joinpath("cc-canesm2-driven/quebec_0.1_crcm5-hcd-rl-intfl-cc-canesm2-1980-2010.hdf5").as_posix()),


    ])

    obs_label = "Obs."
    labels = [obs_label, ] + list(sim_name_to_file_path.keys())

    label_to_marker = dict(zip(labels, all_markers))
    label_to_color = dict(zip(labels, all_colors))

    # Get the list of stations to do the comparison with
    start_date = datetime(start_year, 1, 1)
    end_date = datetime(end_year, 12, 31)
    stations = cehq_station.read_station_data(
        start_date=start_date, end_date=end_date, selected_ids=selected_ids
    )

    # Get geophysical fields from one of the model simulations
    path0 = list(sim_name_to_file_path.values())[0]
    lons2d, lats2d, basemap = analysis.get_basemap_from_hdf(file_path=path0)
    flow_directions = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_FLOW_DIRECTIONS_NAME)
    lake_fraction = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_LAKE_FRACTION_NAME)

    accumulation_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_ACCUMULATION_AREA_NAME)
    area_m2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_M2)

    # Try to read cell areas im meters if it is not Ok then try in km2
    if area_m2 is not None:
        cell_area_km2 = area_m2 * 1.0e-6
    else:
        cell_area_km2 = analysis.get_array_from_file(path=path0, var_name=infovar.HDF_CELL_AREA_NAME_KM2)

    # Create a cell manager if it is not provided
    cell_manager = CellManager(flow_directions, accumulation_area_km2=accumulation_area_km2,
                               lons2d=lons2d, lats2d=lats2d)

    # Get the list of the corresponding model points
    station_to_modelpoint = cell_manager.get_model_points_for_stations(
        station_list=stations,
        lake_fraction=lake_fraction,
        drainaige_area_reldiff_limit=0.1)

    # plot_utils.apply_plot_params(font_size=10, width_cm=20, height_cm=18)
    fig = plt.figure()

    ncols = max([len(rp_list) for et, rp_list in ExtremeProperties.extreme_type_to_return_periods.items()])
    nrows = len(ExtremeProperties.extreme_types)
    gs = GridSpec(nrows, ncols)

    ext_type_to_rp_to_ax = OrderedDict()
    ax_with_legend = None

    label_to_ax_to_xdata = {}
    label_to_ax_to_ydata = {}
    for row, ext_type in enumerate(ExtremeProperties.extreme_types):
        ext_type_to_rp_to_ax[ext_type] = OrderedDict()
        for col, rperiod in enumerate(ExtremeProperties.extreme_type_to_return_periods[ext_type]):
            ax = fig.add_subplot(gs[row, col])
            ext_type_to_rp_to_ax[ext_type][rperiod] = ax

            if col == 0:
                ax.set_ylabel(ext_type)

            if row == nrows - 1 and col == ncols - 1:
                ax_with_legend = ax

            # Set axes labels
            if row == nrows - 1:
                ax.set_xlabel("Observations")

            if col == 0:
                ax.set_ylabel("Model")

            for label in sim_name_to_file_path:

                if label not in label_to_ax_to_xdata:
                    label_to_ax_to_xdata[label] = {ax: []}
                    label_to_ax_to_ydata[label] = {ax: []}
                else:
                    label_to_ax_to_xdata[label][ax] = []
                    label_to_ax_to_ydata[label][ax] = []

            ax.set_xscale("log")
            ax.set_yscale("log")

    print("Initial list of stations:")

    sim_label_to_handle = {}
    for s in stations:
        print("{0}".format(s))
        assert isinstance(s, Station)

        print(len([y for y in s.get_list_of_complete_years() if start_year <= y <= end_year]))
        df_ext_obs = extreme_commons.get_annual_extrema(ts_times=s.dates, ts_vals=s.values,
                                                        start_year=start_year, end_year=end_year)
        mp = station_to_modelpoint[s]

        assert isinstance(mp, ModelPoint)

        years_of_interest = df_ext_obs.index

        label_to_extrema_model = {}



        # label -> ext_type -> [return period -> ret level, return period -> std]
        label_to_return_levels = OrderedDict(
            [(obs_label, OrderedDict())]
        )
        for sim_label, sim_path in sim_name_to_file_path.items():
            label_to_return_levels[sim_label] = OrderedDict()
            label_to_extrema_model[sim_label] = OrderedDict()



        # Calculate the return levels and standard deviations
        for ext_type in ExtremeProperties.extreme_types:

            return_periods = ExtremeProperties.extreme_type_to_return_periods[ext_type]

            # fit GEV distribution and apply non-parametric bootstrap to get std
            label_to_return_levels[obs_label][ext_type] = gevfit.do_gevfit_for_a_point(df_ext_obs[ext_type].values,
                                                                                       extreme_type=ext_type,
                                                                                       return_periods=return_periods)
            return_levels_obs, rl_stds_obs = label_to_return_levels[obs_label][ext_type]


            # get annual extremas for the model output at the points colose to the stations
            for sim_label, sim_path in sim_name_to_file_path.items():
                label_to_return_levels[sim_label] = OrderedDict()

                ext_field = analysis.get_annual_extrema(
                    rconfig=RunConfig(data_path=sim_path, start_year=start_year, end_year=end_year),
                    varname="STFL", months_of_interest=ExtremeProperties.extreme_type_to_month_of_interest[ext_type],
                    n_avg_days=ExtremeProperties.extreme_type_to_n_agv_days[ext_type],
                    high_flow=ext_type == ExtremeProperties.high)

                # Select only those years when obs are available
                ts_data = [v for y, v in zip(range(start_year, end_year + 1), ext_field[:, mp.ix, mp.jy]) if
                           y in years_of_interest]
                ts_data = np.array(ts_data)
                return_levels, rl_stds = gevfit.do_gevfit_for_a_point(ts_data, extreme_type=ext_type,
                                                                      return_periods=return_periods)





                # Do the plotting
                for rp in return_periods:
                    ax = ext_type_to_rp_to_ax[ext_type][rp]
                    ax.set_title("T = {rp}-year".format(rp=rp))

                    # h = ax.errorbar(return_levels_obs[rp], return_levels[rp],
                    # marker=label_to_marker[sim_label], color=label_to_color[sim_label], label=sim_label,
                    #                 xerr=rl_stds_obs[rp] * 1.96, yerr=rl_stds[rp] * 1.96)

                    h = ax.scatter(return_levels_obs[rp], return_levels[rp],
                                   marker=label_to_marker[sim_label], color=label_to_color[sim_label], label=sim_label)



                    # save the data for maybe further calculation of the correlation coefficients
                    label_to_ax_to_xdata[sim_label][ax].append(return_levels_obs[rp])
                    label_to_ax_to_ydata[sim_label][ax].append(return_levels[rp])

                    sim_label_to_handle[sim_label] = h



    # Calculate the biases
    for sim_label in sim_name_to_file_path:
        for ext_type in ExtremeProperties.extreme_types:
            ret_periods = ExtremeProperties.extreme_type_to_return_periods[ext_type]
            for rp in ret_periods:

                ax = ext_type_to_rp_to_ax[ext_type][rp]
                mod = np.asarray(label_to_ax_to_ydata[sim_label][ax])
                obs = np.asarray(label_to_ax_to_xdata[sim_label][ax])

                bias = np.mean((mod - obs)/obs)
                corr, pv = stats.pearsonr(mod, obs)
                print("({sim_label}) Mean bias for {rp}-year {ext_type}-flow return level is: {bias}; corr={corr:.2f}; corr_pval={corr_pval:2g}".format(
                    sim_label=sim_label, rp=rp, bias=bias, corr=corr, corr_pval=pv,
                    ext_type=ext_type
                ))




    sfmt = ScalarFormatter(useMathText=True)
    sfmt.set_powerlimits((-2, 2))
    for et, rp_to_ax in ext_type_to_rp_to_ax.items():
        for rp, ax in rp_to_ax.items():
            xmin, xmax = ax.get_xlim()
            ymin, ymax = ax.get_ylim()
            x1 = min(xmin, ymin)
            x2 = min(xmax, ymax)
            ax.plot([x1, x2], [x1, x2], "k--")
            # ax.xaxis.set_major_locator(MaxNLocator(nbins=5))
            # ax.yaxis.set_major_locator(MaxNLocator(nbins=5))
            # ax.xaxis.set_major_formatter(sfmt)
            # ax.yaxis.set_major_formatter(sfmt)

    sim_labels = list(sim_name_to_file_path.keys())
    ax_with_legend.legend([sim_label_to_handle[sl] for sl in sim_labels], sim_labels,
                          bbox_to_anchor=(1, -0.25), borderaxespad=0.0, loc="upper right",
                          ncol=2, scatterpoints=1, numpoints=1)

    # Save the plot
    img_file = "{}.eps".format("_".join(sorted(label_to_marker.keys())))
    img_file = img_folder.joinpath(img_file)

    fig.tight_layout()
    with img_file.open("wb") as f:
        fig.savefig(f, bbox_inches="tight")