Esempio n. 1
0
    def test_comp_result(self):
        for i in range(self.split_num):
            data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], str(i),
                                                   data_source_file_name='test_data_source.txt',
                                                   stat_file_name='test_Statistics.json',
                                                   flow_file_name='test_flow.npy',
                                                   forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy',
                                                   f_dict_file_name='test_dictFactorize.json',
                                                   var_dict_file_name='test_dictAttribute.json',
                                                   t_s_dict_file_name='test_dictTimeSpace.json')
            data_model_majordam = GagesModel.load_datamodel(self.config_data.data_path["Temp"], str(i),
                                                            data_source_file_name='test_data_source_majordam.txt',
                                                            stat_file_name='test_Statistics_majordam.json',
                                                            flow_file_name='test_flow_majordam.npy',
                                                            forcing_file_name='test_forcing_majordam.npy',
                                                            attr_file_name='test_attr_majordam.npy',
                                                            f_dict_file_name='test_dictFactorize_majordam.json',
                                                            var_dict_file_name='test_dictAttribute_majordam.json',
                                                            t_s_dict_file_name='test_dictTimeSpace_majordam.json')
            pred, obs = load_result(data_model.data_source.data_config.data_path['Temp'], self.test_epoch)
            pred = pred.reshape(pred.shape[0], pred.shape[1])
            obs = obs.reshape(obs.shape[0], obs.shape[1])
            inds = statError(obs, pred)
            inds['STAID'] = data_model.t_s_dict["sites_id"]
            inds_df = pd.DataFrame(inds)

            pred_majordam, obs_majordam = load_result(data_model_majordam.data_source.data_config.data_path['Temp'],
                                                      self.test_epoch, pred_name='flow_pred_majordam',
                                                      obs_name='flow_obs_majordam')
            pred_majordam = pred_majordam.reshape(pred_majordam.shape[0], pred_majordam.shape[1])
            obs_majordam = obs_majordam.reshape(obs_majordam.shape[0], obs_majordam.shape[1])
            inds_majordam = statError(obs_majordam, pred_majordam)
            inds_majordam['STAID'] = data_model_majordam.t_s_dict["sites_id"]
            inds_majordam_df = pd.DataFrame(inds_majordam)

            keys_nse = "NSE"
            xs = []
            ys = []
            cases_exps_legends_together = ["PUB_test_in_no-major-dam_basins", "PUB_test_in_major-dam_basins"]

            x1, y1 = ecdf(inds_df[keys_nse])
            xs.append(x1)
            ys.append(y1)

            x2, y2 = ecdf(inds_majordam_df[keys_nse])
            xs.append(x2)
            ys.append(y2)

            plot_ecdfs(xs, ys, cases_exps_legends_together)
Esempio n. 2
0
 def test_plot_ecdf_together(self):
     xs = []
     ys = []
     cases_exps = ["basic_exp37", "basic_exp39"]
     cases_exps_legends = ["random_1234", "random_123"]
     test_epoch = 300
     for case_exp in cases_exps:
         config_data_i = load_dataconfig_case_exp(cfg, case_exp)
         pred_i, obs_i = load_result(config_data_i.data_path['Temp'],
                                     test_epoch)
         pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1])
         obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
         inds_i = statError(obs_i, pred_i)
         x, y = ecdf(inds_i[self.keys[0]])
         xs.append(x)
         ys.append(y)
     plot_ecdfs(xs, ys, cases_exps_legends, x_str="NSE", y_str="CDF")
     # cases_exps_addition = ["basic_exp39"]
     # xs_addition = []
     # ys_addition = []
     # for case_exp in cases_exps_addition:
     #     config_data_i = load_dataconfig_case_exp(cfg, case_exp)
     #     pred_i, obs_i = load_result(config_data_i.data_path['Temp'], test_epoch)
     #     pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1])
     #     obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
     #     inds_i = statError(obs_i, pred_i)
     #     x, y = ecdf(inds_i[self.keys[0]])
     #     xs_addition.append(x)
     #     ys_addition.append(y)
     # plot_ecdfs(xs_addition, ys_addition, ["new"], x_str="NSE", y_str="CDF")
     plt.show()
Esempio n. 3
0
 def test_plot_ecdf_matplotlib(self):
     xs = []
     ys = []
     cases_exps = [
         "basic_exp37", "basic_exp39", "basic_exp40", "basic_exp41",
         "basic_exp42"
     ]
     cases_exps_legends = [
         "random_1234", "random_123", "random_12345", "random_111",
         "random_1111"
     ]
     test_epoch = 300
     for case_exp in cases_exps:
         config_data_i = load_dataconfig_case_exp(cfg, case_exp)
         pred_i, obs_i = load_result(config_data_i.data_path['Temp'],
                                     test_epoch)
         pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1])
         obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
         inds_i = statError(obs_i, pred_i)
         x, y = ecdf(inds_i[self.keys[0]])
         xs.append(x)
         ys.append(y)
     dash_lines = [False, False, False, False, True]
     plot_ecdfs_matplot(
         xs,
         ys,
         cases_exps_legends,
         colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "grey"],
         dash_lines=dash_lines,
         x_str="NSE",
         y_str="CDF")
     plt.show()
Esempio n. 4
0
def plot_ecdf(mydataframe, mycolumn, save_file=None):
    """Empirical cumulative distribution function"""
    x, y = ecdf(mydataframe[mycolumn])
    df = pd.DataFrame({"x": x, "y": y})
    sns.set_style("ticks", {'axes.grid': True})
    sns.lineplot(x="x", y="y", data=df,
                 estimator=None).set(xlim=(0, 1),
                                     xticks=np.arange(0, 1, 0.05),
                                     yticks=np.arange(0, 1, 0.05))
    plt.show()
    if save_file is not None:
        plt.savefig(save_file)
Esempio n. 5
0
 def test_plot_ecdf_together(self):
     xs = []
     ys = []
     cases_exps = ["basic_exp38", "warmup_exp1"]
     cases_exps_legends = ["without_warmup", "with_warmup"]
     for case_exp in cases_exps:
         config_data_i = load_dataconfig_case_exp(case_exp)
         pred_i, obs_i = load_result(config_data_i.data_path['Temp'],
                                     self.test_epoch)
         pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1])
         obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
         inds_i = statError(obs_i, pred_i)
         x, y = ecdf(inds_i["NSE"])
         xs.append(x)
         ys.append(y)
     plot_ecdfs(xs, ys, cases_exps_legends)
Esempio n. 6
0
x_intervals = [50, 0.1, 0.1, 0.1, 50, 50]
x_lims = [(-200, 200), (0, 1), (0, 1), (0, 1), (-100, 300), (-100, 300)]
show_legends = [True, False, False, False, False, False]
idx_tmp = 0
cdf_values = edict()
for key_tmp in keys_ecdf:
    cdf_values[key_tmp] = edict()
    xs = []
    ys = []
    # cases_exps_legends = ["523sites_from_LSTM-CONUS", "523sites_trained_in_LSTM-CAMELS"]
    cases_exps_legends = [
        "Train: 3557 basins; Test: 523 basins in CAMELS",
        "Train: 523 basins in CAMELS; Test: 523 basins in CAMELS",
        "Train: 3557 basins; Test: 3557 basins"
    ]
    x1, y1 = ecdf(inds_df[key_tmp].iloc[idx_lst_camels])
    xs.append(x1)
    ys.append(y1)
    cdf_values[key_tmp][cases_exps_legends[0]] = [x1, y1]

    x2, y2 = ecdf(inds_df_camels[key_tmp])
    xs.append(x2)
    ys.append(y2)
    cdf_values[key_tmp][cases_exps_legends[1]] = [x2, y2]

    x_conus, y_conus = ecdf(inds_df[key_tmp])
    xs.append(x_conus)
    ys.append(y_conus)
    cdf_values[key_tmp][cases_exps_legends[2]] = [x_conus, y_conus]

    # plot_ecdfs(xs, ys, cases_exps_legends, x_str="NSE", y_str="CDF")
    inds_df_conus = load_ensemble_result(cfg, conus_exps, test_epoch)
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['font.serif'] = ['Times New Roman'
                                  ] + plt.rcParams['font.serif']
    sns.set(font="serif", font_scale=1.5)
    fig = plt.figure(figsize=(12, 4))
    gs = gridspec.GridSpec(1, 3)
    keys_nse = "NSE"

    xs_nodam = []
    ys_nodam = []
    cases_exps_legends_nodam = [
        "no_dam_alone", "no_dam_in_pair1", "no_dam_in_pair2", "no_dam_in_conus"
    ]
    inds_df_nodam = load_ensemble_result(cfg, nodam_exp_lst, test_epoch)
    x_nodam_solo, y_nodam_solo = ecdf(inds_df_nodam[keys_nse])
    xs_nodam.append(x_nodam_solo)
    ys_nodam.append(y_nodam_solo)
    x_nodam_pair1, y_nodam_pair1 = ecdf(
        inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1])
    xs_nodam.append(x_nodam_pair1)
    ys_nodam.append(y_nodam_pair1)
    x_nodam_pair2, y_nodam_pair2 = ecdf(
        inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2])
    xs_nodam.append(x_nodam_pair2)
    ys_nodam.append(y_nodam_pair2)
    x_nodam_conus, y_nodam_conus = ecdf(
        inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus])
    xs_nodam.append(x_nodam_conus)
    ys_nodam.append(y_nodam_conus)
    ax1 = plt.subplot(gs[0])
Esempio n. 8
0
    def test_diff_dor_fig2_in_the_paper(self):
        data_model = GagesModel.load_datamodel(
            self.config_data.data_path["Temp"],
            data_source_file_name='data_source.txt',
            stat_file_name='Statistics.json',
            flow_file_name='flow.npy',
            forcing_file_name='forcing.npy',
            attr_file_name='attr.npy',
            f_dict_file_name='dictFactorize.json',
            var_dict_file_name='dictAttribute.json',
            t_s_dict_file_name='dictTimeSpace.json')
        config_data = self.config_data
        config_file = self.config_file
        test_epoch = self.test_epoch
        exp_lst = self.exp_lst
        figure_dpi = self.FIGURE_DPI
        inds_df, pred_mean, obs_mean = load_ensemble_result(config_file,
                                                            exp_lst,
                                                            test_epoch,
                                                            return_value=True)
        diversion_yes = True
        diversion_no = False
        source_data_diversion = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            diversion=diversion_yes)
        source_data_nodivert = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            diversion=diversion_no)
        sites_id_nodivert = source_data_nodivert.all_configs[
            'flow_screen_gage_id']
        sites_id_diversion = source_data_diversion.all_configs[
            'flow_screen_gage_id']

        dor_1 = -self.dor
        dor_2 = self.dor
        source_data_dor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        source_data_dor2 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_dor2 = source_data_dor2.all_configs['flow_screen_gage_id']

        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 100000])
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']
        sites_id_dor1 = np.intersect1d(np.array(sites_id_dor1),
                                       np.array(sites_id_withdams)).tolist()

        no_divert_small_dor = np.intersect1d(sites_id_nodivert, sites_id_dor1)
        no_divert_large_dor = np.intersect1d(sites_id_nodivert, sites_id_dor2)
        diversion_small_dor = np.intersect1d(sites_id_diversion, sites_id_dor1)
        diversion_large_dor = np.intersect1d(sites_id_diversion, sites_id_dor2)

        all_sites = data_model.t_s_dict["sites_id"]
        idx_lst_nodivert_smalldor = [
            i for i in range(len(all_sites))
            if all_sites[i] in no_divert_small_dor
        ]
        idx_lst_nodivert_largedor = [
            i for i in range(len(all_sites))
            if all_sites[i] in no_divert_large_dor
        ]
        idx_lst_diversion_smalldor = [
            i for i in range(len(all_sites))
            if all_sites[i] in diversion_small_dor
        ]
        idx_lst_diversion_largedor = [
            i for i in range(len(all_sites))
            if all_sites[i] in diversion_large_dor
        ]

        keys_nse = "NSE"
        xs = []
        ys = []
        cases_exps_legends_together = [
            "not_diverted_small_dor", "not_diverted_large_dor",
            "diversion_small_dor", "diversion_large_dor", "CONUS"
        ]

        x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_smalldor])
        xs.append(x1)
        ys.append(y1)

        x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_largedor])
        xs.append(x2)
        ys.append(y2)

        x3, y3 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_smalldor])
        xs.append(x3)
        ys.append(y3)

        x4, y4 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_largedor])
        xs.append(x4)
        ys.append(y4)

        x_conus, y_conus = ecdf(inds_df[keys_nse])
        xs.append(x_conus)
        ys.append(y_conus)
        hydro_logger.info(
            "The median NSEs of all five curves (%s) are \n %.2f, %.2f, %.2f, %.2f, %.2f",
            cases_exps_legends_together, np.median(x1), np.median(x2),
            np.median(x3), np.median(x4), np.median(x_conus))
        # plot_ecdfs_matplot(xs, ys, cases_exps_legends_together,
        #                    colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "grey"],
        #                    dash_lines=[False, False, False, False, True], x_str="NSE", y_str="CDF")
        # plot using two linestyles and two colors for dor and diversion.
        # plot_ecdfs(xs, ys, cases_exps_legends_together, x_str="NSE", y_str="CDF")
        # define color scheme and line style
        colors = ["#1f77b4", "#d62728"]
        linestyles = ['-', "--"]
        markers = ["", "."]

        fig = plt.figure(figsize=(8, 6))
        axes = fig.add_axes([0.1, 0.1, 0.8, 0.8])
        # for i, marker in enumerate(markers):
        for i, linestyle in enumerate(linestyles):
            for j, color in enumerate(colors):
                plt.plot(
                    xs[i * 2 + j],
                    ys[i * 2 + j],
                    color=color,
                    ls=linestyle,  # marker=marker,
                    label=cases_exps_legends_together[i * 2 + j])
        line_i, = axes.plot(x_conus,
                            y_conus,
                            color="grey",
                            label=cases_exps_legends_together[4])
        line_i.set_dashes([2, 2, 10, 2])

        x_str = "NSE"
        y_str = "CDF"
        x_lim = (0, 1)
        y_lim = (0, 1)
        x_interval = 0.1
        y_interval = 0.1
        plt.xlabel(x_str, fontsize=18)
        plt.ylabel(y_str, fontsize=18)
        axes.set_xlim(x_lim[0], x_lim[1])
        axes.set_ylim(y_lim[0], y_lim[1])
        # set x y number font size
        plt.xticks(np.arange(x_lim[0], x_lim[1] + x_lim[1] / 100, x_interval),
                   fontsize=16)
        plt.yticks(np.arange(y_lim[0], y_lim[1] + y_lim[1] / 100, y_interval),
                   fontsize=16)
        plt.grid()
        # Hide the right and top spines
        axes.spines['right'].set_visible(False)
        axes.spines['top'].set_visible(False)
        axes.legend()
        plt.legend(prop={'size': 16})
        plt.savefig(os.path.join(config_data.data_path["Out"],
                                 'new_dor_divert_comp_matplotlib.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")
        plt.show()
Esempio n. 9
0
    def test_gages_nse_dam_attr(self):
        figure_dpi = 600
        config_data = self.config_data
        data_dir = config_data.data_path["Temp"]
        data_model = GagesModel.load_datamodel(
            data_dir,
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        gages_id = data_model.t_s_dict["sites_id"]

        exp_lst = [
            "basic_exp37", "basic_exp39", "basic_exp40", "basic_exp41",
            "basic_exp42", "basic_exp43"
        ]
        self.inds_df, pred_mean, obs_mean = load_ensemble_result(
            config_data.config_file,
            exp_lst,
            config_data.config_file.TEST_EPOCH,
            return_value=True)
        show_ind_key = 'NSE'

        plt.rcParams['font.family'] = 'serif'
        plt.rcParams['font.serif'] = ['Times New Roman'
                                      ] + plt.rcParams['font.serif']
        # plot NSE-DOR
        attr_lst = ["RUNAVE7100", "STOR_NOR_2009"]
        attrs_runavg_stor = data_model.data_source.read_attr(
            gages_id, attr_lst, is_return_dict=False)
        run_avg = attrs_runavg_stor[:, 0] * (10**(-3)) * (10**6
                                                          )  # m^3 per year
        nor_storage = attrs_runavg_stor[:, 1] * 1000  # m^3
        dors = nor_storage / run_avg
        # dor = 0 is not totally same with dam_num=0 (some dammed basins' dor is about 0.00),
        # here for zero-dor we mainly rely on dam_num = 0
        attr_dam_num = ["NDAMS_2009"]
        attrs_dam_num = data_model.data_source.read_attr(gages_id,
                                                         attr_dam_num,
                                                         is_return_dict=False)
        df = pd.DataFrame({
            "DOR": dors,
            "DAM_NUM": attrs_dam_num[:, 0],
            show_ind_key: self.inds_df[show_ind_key].values
        })
        hydro_logger.info("statistics of dors:\n %s", df.describe())
        hydro_logger.info("percentiles of dors:\n %s", df.quantile(q=0.95))
        hydro_logger.info("ecdf of dors:\n %s", ecdf(dors))

        # boxplot
        # add a column to represent the dor range for the df
        dor_value_range_lst = [[0, 0], [0, 0.02], [0.02, 0.05], [0.05, 0.1],
                               [0.1, 0.2], [0.2, 0.4], [0.4, 0.8],
                               [0.8, 10000]]
        dor_range_lst = ["0"] + [
            str(dor_value_range_lst[i][0]) + "-" +
            str(dor_value_range_lst[i][1])
            for i in range(1,
                           len(dor_value_range_lst) - 1)
        ] + [">" + str(dor_value_range_lst[-1][0])]

        # add a column to represent the dam_num range for the df
        dam_num_value_range_lst = [[0, 0], [0, 1], [1, 3], [3, 5], [5, 10],
                                   [10, 20], [20, 50], [50, 10000]]
        dam_num_range_lst = ["0", "1"] + [
            str(dam_num_value_range_lst[i][0]) + "-" +
            str(dam_num_value_range_lst[i][1])
            for i in range(2,
                           len(dam_num_value_range_lst) - 1)
        ] + [">" + str(dam_num_value_range_lst[-1][0])]

        def in_which_range(value_temp):
            if value_temp == 0:
                return "0"
            the_range = [
                a_range for a_range in dor_value_range_lst
                if a_range[0] < value_temp <= a_range[1]
            ]
            if the_range[0][0] == dor_value_range_lst[-1][0]:
                the_range_str = ">" + str(the_range[0][0])
            else:
                the_range_str = str(the_range[0][0]) + "-" + str(
                    the_range[0][1])
            return the_range_str

        def in_which_dam_num_range(value_tmp):
            if value_tmp == 0:
                return "0"
            if value_tmp == 1:
                return "1"
            the_ran = [
                a_ran for a_ran in dam_num_value_range_lst
                if a_ran[0] < value_tmp <= a_ran[1]
            ]
            if the_ran[0][0] == dam_num_value_range_lst[-1][0]:
                the_ran_str = ">" + str(the_ran[0][0])
            else:
                the_ran_str = str(the_ran[0][0]) + "-" + str(the_ran[0][1])
            return the_ran_str

        df["DOR_RANGE"] = df["DOR"].apply(in_which_range)
        df["DAM_NUM_RANGE"] = df["DAM_NUM"].apply(in_which_dam_num_range)
        df.loc[(df["DAM_NUM"] > 0) & (df["DOR_RANGE"] == "0"),
               "DOR_RANGE"] = dor_range_lst[1]
        shown_nse_range_boxplots = [-0.5, 1.0]
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        plot_boxs(df,
                  "DOR_RANGE",
                  show_ind_key,
                  ylim=shown_nse_range_boxplots,
                  order=dor_range_lst)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DOR-boxplots-' + str(shown_nse_range_boxplots) + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")
        plt.figure()
        shown_nse_range_boxplots = [0, 1.0]
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        plot_boxs(df,
                  "DAM_NUM_RANGE",
                  show_ind_key,
                  ylim=shown_nse_range_boxplots,
                  order=dam_num_range_lst)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DAM_NUM-boxplots-' + str(shown_nse_range_boxplots) + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")
        nums_in_dor_range = [
            df[df["DOR_RANGE"] == a_range_rmp].shape[0]
            for a_range_rmp in dor_range_lst
        ]
        ratios_in_dor_range = [
            a_num / df.shape[0] for a_num in nums_in_dor_range
        ]
        hydro_logger.info(
            "the number and ratio of basins in each dor range\n: %s \n %s",
            nums_in_dor_range, ratios_in_dor_range)

        nums_in_dam_num_range = [
            df[df["DAM_NUM_RANGE"] == a_range_rmp].shape[0]
            for a_range_rmp in dam_num_range_lst
        ]
        ratios_in_dam_num_range = [
            a_num / df.shape[0] for a_num in nums_in_dam_num_range
        ]
        hydro_logger.info(
            "the number and ratio of basins in each dam_num range\n: %s \n %s",
            nums_in_dam_num_range, ratios_in_dam_num_range)

        # regplot
        plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        sr = sns.regplot(x="DOR",
                         y=show_ind_key,
                         data=df[df[show_ind_key] >= 0],
                         scatter_kws={'s': 10})
        show_dor_max = df.quantile(
            q=0.95)["DOR"]  # 30  # max(dors)  # 0.8  # 10
        show_dor_min = min(dors)
        plt.ylim(0, 1)
        plt.xlim(show_dor_min, show_dor_max)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DOR-shown-max-' + str(show_dor_max) + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        # jointplot
        # dor_range = [0.2, 0.9]
        dor_range = [0.002, 0.2]
        # plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        # g = sns.jointplot(x="DOR", y=show_ind_key, data=df[(df["DOR"] < 1) & (df[show_ind_key] >= 0)], kind="reg",
        #                   marginal_kws=dict(bins=25))
        # g = sns.jointplot(x="DOR", y=show_ind_key, data=df[(df["DOR"] < 1) & (df[show_ind_key] >= 0)], kind="hex",
        #                   color="b", marginal_kws=dict(bins=50))
        g = sns.jointplot(
            x="DOR",
            y=show_ind_key,
            data=df[(df["DOR"] < dor_range[1]) & (df["DOR"] > dor_range[0]) &
                    (df[show_ind_key] >= 0)],
            kind="hex",
            color="b")
        g.ax_marg_x.set_xlim(dor_range[0], dor_range[1])
        # g.ax_marg_y.set_ylim(-0.5, 1)
        plt.savefig(os.path.join(
            config_data.data_path["Out"],
            'NSE~DOR(range-)' + str(dor_range) + '-jointplot.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        nid_dir = os.path.join(
            "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid",
            "test")
        nid_input = NidModel.load_nidmodel(
            nid_dir,
            nid_source_file_name='nid_source.txt',
            nid_data_file_name='nid_data.shp')
        gage_main_dam_purpose = unserialize_json(
            os.path.join(nid_dir, "dam_main_purpose_dict.json"))
        data_input = GagesDamDataModel(data_model, nid_input,
                                       gage_main_dam_purpose)
        dam_coords = unserialize_json_ordered(
            os.path.join(nid_dir, "dam_points_dict.json"))
        dam_storages = unserialize_json_ordered(
            os.path.join(nid_dir, "dam_storages_dict.json"))
        dam_ids_1 = list(gage_main_dam_purpose.keys())
        dam_ids_2 = list(dam_coords.keys())
        dam_ids_3 = list(dam_storages.keys())
        assert (all(x < y for x, y in zip(dam_ids_1, dam_ids_1[1:])))
        assert (all(x < y for x, y in zip(dam_ids_2, dam_ids_2[1:])))
        assert (all(x < y for x, y in zip(dam_ids_3, dam_ids_3[1:])))

        sites = list(dam_coords.keys())
        c, ind1, idx_lst_nse_range = np.intersect1d(sites,
                                                    gages_id,
                                                    return_indices=True)

        std_storage_in_a_basin = list(map(np.std, dam_storages.values()))
        log_std_storage_in_a_basin = list(
            map(np.log,
                np.array(std_storage_in_a_basin) + 1))
        nse_values = self.inds_df["NSE"].values[idx_lst_nse_range]
        df = pd.DataFrame({
            "DAM_STORAGE_STD": log_std_storage_in_a_basin,
            show_ind_key: nse_values
        })
        plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        g = sns.regplot(x="DAM_STORAGE_STD",
                        y=show_ind_key,
                        data=df[df[show_ind_key] >= 0],
                        scatter_kws={'s': 10})
        show_max = max(log_std_storage_in_a_basin)
        show_min = min(log_std_storage_in_a_basin)
        if show_min < 0:
            show_min = 0
        # g.ax_marg_x.set_xlim(show_min, show_max)
        # g.ax_marg_y.set_ylim(0, 1)
        plt.ylim(0, 1)
        plt.xlim(show_min, show_max)
        plt.savefig(os.path.join(config_data.data_path["Out"],
                                 'NSE~' + "DAM_STORAGE_STD" + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        gages_loc_lat = data_model.data_source.gage_dict["LAT_GAGE"]
        gages_loc_lon = data_model.data_source.gage_dict["LNG_GAGE"]
        gages_loc = [[gages_loc_lat[i], gages_loc_lon[i]]
                     for i in range(len(gages_id))]
        # calculate index of dispersion, then plot the NSE-dispersion scatterplot
        # Geo coord system of gages_loc and dam_coords are both NAD83
        coefficient_of_var = list(
            map(coefficient_of_variation, gages_loc, dam_coords.values()))
        coefficient_of_var_min = min(coefficient_of_var)
        coefficient_of_var_max = max(coefficient_of_var)
        dispersion_var = "DAM_GAGE_DIS_VAR"
        nse_values = self.inds_df["NSE"].values[idx_lst_nse_range]
        df = pd.DataFrame({
            dispersion_var: coefficient_of_var,
            show_ind_key: nse_values
        })
        plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        g = sns.regplot(x=dispersion_var,
                        y=show_ind_key,
                        data=df[df[show_ind_key] >= 0],
                        scatter_kws={'s': 10})
        show_max = coefficient_of_var_max
        show_min = coefficient_of_var_min
        if show_min < 0:
            show_min = 0
        # g.ax_marg_x.set_xlim(show_min, show_max)
        # g.ax_marg_y.set_ylim(0, 1)
        plt.ylim(0, 1)
        plt.xlim(show_min, show_max)
        plt.savefig(os.path.join(config_data.data_path["Out"],
                                 'NSE~' + dispersion_var + '.png'),
                    dpi=figure_dpi,
                    bbox_inches="tight")

        idx_dispersions = list(
            map(ind_of_dispersion, gages_loc, dam_coords.values()))
        idx_dispersion_min = min(idx_dispersions)
        idx_dispersion_max = max(idx_dispersions)
        dispersion_var = "DAM_DISPERSION_BASIN"
        # nse_range = [0, 1]
        # idx_lst_nse_range = inds_df_now[(inds_df_now[show_ind_key] >= nse_range[0]) & (inds_df_now[show_ind_key] < nse_range[1])].index.tolist()
        nse_values = self.inds_df["NSE"].values[idx_lst_nse_range]
        df = pd.DataFrame({
            dispersion_var: idx_dispersions,
            show_ind_key: nse_values
        })
        # g = sns.regplot(x=dispersion_var, y=show_ind_key, data=df[df[show_ind_key] >= 0], scatter_kws={'s': 10})
        if idx_dispersion_min < 0:
            idx_dispersion_min = 0
        plt.ylim(0, 1)
        plt.xlim(idx_dispersion_min, idx_dispersion_max)
        # plt.figure()
        sns.set(font="serif", font_scale=1.5, color_codes=True)
        g = sns.jointplot(x=dispersion_var,
                          y=show_ind_key,
                          data=df[df[show_ind_key] >= 0],
                          kind="reg")
        g.ax_marg_x.set_xlim(idx_dispersion_min, idx_dispersion_max)
        g.ax_marg_y.set_ylim(0, 1)
        plt.show()
Esempio n. 10
0
    def test_stor_seperate(self):
        config_dir = definitions.CONFIG_DIR
        config_file = os.path.join(config_dir, "basic/config_exp18.ini")
        subdir = r"basic/exp18"
        config_data = GagesConfig.set_subdir(config_file, subdir)
        data_model = GagesModel.load_datamodel(
            config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        all_sites = data_model.t_s_dict["sites_id"]
        storage_nor_1 = [0, 50]
        storage_nor_2 = [50, 15000]  # max is 14348.6581036888
        source_data_nor1 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            STORAGE=storage_nor_1)
        source_data_nor2 = GagesSource.choose_some_basins(
            config_data,
            config_data.model_dict["data"]["tRangeTrain"],
            STORAGE=storage_nor_2)
        sites_id_nor1 = source_data_nor1.all_configs['flow_screen_gage_id']
        sites_id_nor2 = source_data_nor2.all_configs['flow_screen_gage_id']
        idx_lst_nor1 = [
            i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor1
        ]
        idx_lst_nor2 = [
            i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor2
        ]

        pred, obs = load_result(
            data_model.data_source.data_config.data_path['Temp'],
            self.test_epoch)
        pred = pred.reshape(pred.shape[0], pred.shape[1])
        obs = obs.reshape(pred.shape[0], pred.shape[1])
        inds = statError(obs, pred)
        inds_df = pd.DataFrame(inds)

        keys_nse = "NSE"
        xs = []
        ys = []
        cases_exps_legends_together = ["small_stor", "large_stor"]

        x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor1])
        xs.append(x1)
        ys.append(y1)

        x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor2])
        xs.append(x2)
        ys.append(y2)

        cases_exps = ["dam_exp12", "dam_exp11"]
        cases_exps_legends_separate = ["small_stor", "large_stor"]
        # cases_exps = ["dam_exp4", "dam_exp5", "dam_exp6"]
        # cases_exps = ["dam_exp1", "dam_exp2", "dam_exp3"]
        # cases_exps_legends = ["dam-lstm", "dam-with-natural-flow", "dam-with-kernel"]
        for case_exp in cases_exps:
            config_data_i = load_dataconfig_case_exp(case_exp)
            pred_i, obs_i = load_result(config_data_i.data_path['Temp'],
                                        self.test_epoch)
            pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1])
            obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
            inds_i = statError(obs_i, pred_i)
            x, y = ecdf(inds_i[keys_nse])
            xs.append(x)
            ys.append(y)

        plot_ecdfs(xs,
                   ys,
                   cases_exps_legends_together + cases_exps_legends_separate,
                   style=["together", "together", "separate", "separate"])
Esempio n. 11
0
    idx_lst_diversion_smalldor = [
        i for i in range(len(all_sites)) if all_sites[i] in diversion_small_dor
    ]
    idx_lst_diversion_largedor = [
        i for i in range(len(all_sites)) if all_sites[i] in diversion_large_dor
    ]

    keys_nse = "NSE"
    xs = []
    ys = []
    cases_exps_legends_together = [
        "not_diverted_small_dor", "not_diverted_large_dor",
        "diversion_small_dor", "diversion_large_dor", "CONUS"
    ]

    x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_smalldor])
    xs.append(x1)
    ys.append(y1)

    x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_largedor])
    xs.append(x2)
    ys.append(y2)

    x3, y3 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_smalldor])
    xs.append(x3)
    ys.append(y3)

    x4, y4 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_largedor])
    xs.append(x4)
    ys.append(y4)