Ejemplo n.º 1
0
    def test_forecast_data_temp_test(self):
        sim_config_data = self.sim_config_data
        sim_source_data = GagesSource(
            sim_config_data, sim_config_data.model_dict["data"]["tRangeTest"])
        sim_df = DataModel(sim_source_data)
        save_datamodel(sim_df,
                       "1",
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')

        config_data = self.config_data
        source_data = GagesSource(config_data,
                                  config_data.model_dict["data"]["tRangeTest"])
        df = DataModel(source_data)
        save_datamodel(df,
                       "2",
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
Ejemplo n.º 2
0
 def test_check_streamflow_data(self):
     source_data = GagesSource(
         self.config_data,
         self.config_data.model_dict["data"]["tRangeTrain"],
         screen_basin_area_huc4=False)
     t_range_list = hydro_time.t_range_days(["1990-01-01", "2010-01-01"])
     # data_temp = source_data.read_usge_gage("01", '01052500', t_range_list)
     data_temp = source_data.read_usge_gage("08", '08013000', t_range_list)
     print(data_temp)
     print(np.argwhere(np.isnan(data_temp)))
Ejemplo n.º 3
0
    def test_screen_some_gauge_and_save(self):
        config_dir = definitions.CONFIG_DIR
        config_file = os.path.join(config_dir, "transdata/config_exp12.ini")
        subdir = r"transdata/exp12"
        config_data = GagesConfig.set_subdir(config_file, subdir)

        ref_source_data = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            ref="Ref")
        ref_sites_id = ref_source_data.all_configs['flow_screen_gage_id']
        ref_sites_id_df = pd.DataFrame({"STAID": ref_sites_id})
        dapeng_dir = os.path.join(self.config_data.data_path["DB"], "dapeng")
        if not os.path.isdir(dapeng_dir):
            os.makedirs(dapeng_dir)
        dapeng_v2_gageid_file = os.path.join(dapeng_dir, "v2.csv")
        ref_sites_id_df.to_csv(dapeng_v2_gageid_file, index=False)

        gages_model = GagesModels(config_data,
                                  screen_basin_area_huc4=False,
                                  major_dam_num=0)
        sites_id_df = pd.DataFrame(
            {"STAID": gages_model.data_model_train.t_s_dict["sites_id"]})
        dapeng_v1_gageid_file = os.path.join(dapeng_dir, "v1.csv")
        sites_id_df.to_csv(dapeng_v1_gageid_file, index=False)

        print("read and save data screen")
Ejemplo n.º 4
0
    def test_trans_all_forcing_file_to_camels(self):
        data_source_dump = os.path.join(self.config_data.data_path["Temp"],
                                        'data_source.txt')
        source_data = unserialize_pickle(data_source_dump)
        output_dir = os.path.join(self.config_data.data_path["DB"],
                                  "basin_mean_forcing", "daymet")
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        region_names = [
            region_temp.split("_")[-1]
            for region_temp in source_data.all_configs['regions']
        ]
        # forcing data file generated is named as "allref", so rename the "all"
        region_names = ["allref" if r == "all" else r for r in region_names]
        year_start = int(source_data.t_range[0].split("-")[0])
        year_end = int(source_data.t_range[1].split("-")[0])
        years = np.arange(year_start, year_end)
        assert (all(x < y for x, y in zip(source_data.gage_dict['STAID'],
                                          source_data.gage_dict['STAID'][1:])))

        config_dir = definitions.CONFIG_DIR
        for i in range(len(region_names)):
            config_file_i = os.path.join(
                config_dir, "transdata/config_exp" + str(i + 1) + ".ini")
            subdir_i = "transdata/exp" + str(i + 1)
            config_data_i = GagesConfig.set_subdir(config_file_i, subdir_i)
            source_data_i = GagesSource(
                config_data_i,
                config_data_i.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False)
            for year in years:
                trans_daymet_to_camels(source_data.all_configs["forcing_dir"],
                                       output_dir, source_data_i.gage_dict,
                                       region_names[i], year)
Ejemplo n.º 5
0
 def test_explore_damcls_datamodel(self):
     config_data = self.config_data
     sites_id_dict = unserialize_json(
         "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json")
     sites_id = list(sites_id_dict.keys())
     source_data_dor1 = GagesSource.choose_some_basins(config_data,
                                                       config_data.model_dict["data"]["tRangeTrain"],
                                                       screen_basin_area_huc4=False,
                                                       sites_id=sites_id)
     norsto = source_data_dor1.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False)
     df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten()})
     # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'),
     #           quoting=csv.QUOTE_NONNUMERIC, index=None)
     df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NORSTOR.csv'),
               quoting=csv.QUOTE_NONNUMERIC, index=None)
Ejemplo n.º 6
0
 def test_explore_dor_dam_num(self):
     config_data = self.config_data
     dor_2 = 0.1
     source_data_dor2 = GagesSource.choose_some_basins(config_data,
                                                       config_data.model_dict["data"]["tRangeTrain"],
                                                       screen_basin_area_huc4=False,
                                                       DOR=dor_2)
     sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id']
     sites_id = np.intersect1d(np.array(self.sites_id), np.array(sites_id_largedam)).tolist()
     norsto = source_data_dor2.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False)
     dam_num = source_data_dor2.read_attr(sites_id, ["NDAMS_2009"], is_return_dict=False)
     df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten(), "DAM_NUM": dam_num.flatten()})
     # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'),
     #           quoting=csv.QUOTE_NONNUMERIC, index=None)
     sns.distplot(df["DAM_NUM"], bins=50)
     plt.show()
     df.to_csv(os.path.join(source_data_dor2.all_configs["out_dir"], '1185largedor_basins_NORSTOR_DAMNUM.csv'),
               quoting=csv.QUOTE_NONNUMERIC, index=None)
Ejemplo n.º 7
0
 def test_explore_(self):
     config_data = self.config_data
     sites_id_dict = unserialize_json(
         "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json")
     sites_id = list(sites_id_dict.keys())
     source_data_dor1 = GagesSource.choose_some_basins(config_data,
                                                       config_data.model_dict["data"]["tRangeTrain"],
                                                       screen_basin_area_huc4=False,
                                                       sites_id=sites_id)
     nse_all = pd.read_csv(
         "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/output/gages/basic/exp37/3557basins_ID_NSE_DOR.csv",
         dtype={0: str})
     sites_ids = nse_all["GAUGE ID"].values
     idx = [i for i in range(len(sites_ids)) if sites_ids[i] in sites_id]
     df = pd.DataFrame({"GAGE_ID": sites_id, "NSE": nse_all["NSE"].values[idx]})
     # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'),
     #           quoting=csv.QUOTE_NONNUMERIC, index=None)
     df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NSE.csv'),
               quoting=csv.QUOTE_NONNUMERIC, index=None)
Ejemplo n.º 8
0
 def test_read_sites_id_see_dor(self):
     exp_lst = ["exp18", "exp19", "exp20", "exp21", "exp22", "exp23"]
     sub_lst = ["0", "1"]
     diff_lst = [
         "dictTimeSpace.json", "test_dictTimeSpace.json",
         "test_dictTimeSpace_2.json"
     ]
     for exp_str in exp_lst:
         for sub_str in sub_lst:
             comp_sites = []
             for item in diff_lst:
                 gage_id_file = os.path.join(
                     self.config_data.config_file["ROOT_DIR"], "temp",
                     "gages", "ecoregion", exp_str, sub_str, item)
                 usgs_id = unserialize_json(gage_id_file)["sites_id"]
                 assert (all(x < y for x, y in zip(usgs_id, usgs_id[1:])))
                 comp_sites.append(usgs_id)
                 # mm/year 1-km grid,  megaliters total storage per sq km  (1 megaliters = 1,000,000 liters = 1,000 cubic meters)
                 # attr_lst = ["RUNAVE7100", "STOR_NID_2009"]
                 attr_lst = ["RUNAVE7100", "STOR_NOR_2009"]
                 source_data = GagesSource.choose_some_basins(
                     self.config_data,
                     self.config_data.model_dict["data"]["tRangeTrain"],
                     screen_basin_area_huc4=False,
                     sites_id=usgs_id)
                 data_attr, var_dict, f_dict = source_data.read_attr(
                     usgs_id, attr_lst)
                 run_avg = data_attr[:, 0] * (10**(-3)) * (10**6
                                                           )  # m^3 per year
                 nor_storage = data_attr[:, 1] * 1000  # m^3
                 dors = nor_storage / run_avg
                 results = [round(i, 3) for i in dors]
                 hydro_logger.info(
                     exp_str + "-" + sub_str + "-" + item + " DOR: %s",
                     results)
             hydro_logger.info(
                 "the intersection of each pair of sites: %s, %s, %s",
                 np.intersect1d(comp_sites[0], comp_sites[1]),
                 np.intersect1d(comp_sites[0], comp_sites[2]),
                 np.intersect1d(comp_sites[1], comp_sites[2]))
Ejemplo n.º 9
0
    def test_gages_dam_stor_hist_basin(self):
        nid_dir = os.path.join(
            "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid",
            "test")
        dam_storages = unserialize_json(
            os.path.join(nid_dir, "dam_storages_dict.json"))

        sites = np.array(list(dam_storages.keys()))

        dor_2 = 0.02
        source_data_dor2 = GagesSource.choose_some_basins(
            self.config_data,
            self.config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id']
        c, ind1, idx_lst_nse_range = np.intersect1d(sites,
                                                    sites_id_largedam,
                                                    return_indices=True)

        num = 4
        num_lst = np.sort(np.random.choice(len(c), num, replace=False))
        chosen_sites = c[num_lst]
        hist_bins = 20

        fig = plt.figure(figsize=(8, 9))
        gs = gridspec.GridSpec(2, 2)

        for i in range(num):
            ax_k = plt.subplot(gs[int(i / 2), i % 2])
            ax_k.hist(dam_storages[chosen_sites[i]],
                      hist_bins,
                      orientation='vertical',
                      color='red',
                      alpha=0.5)
        plt.show()
Ejemplo n.º 10
0
def dam_lstm(args):
    update_cfg(cfg, args)
    random_seed = cfg.RANDOM_SEED
    test_epoch = cfg.TEST_EPOCH
    gpu_num = cfg.CTX
    train_mode = cfg.TRAIN_MODE
    dor = cfg.GAGES.attrScreenParams.DOR
    cache = cfg.CACHE.STATE
    print("train and test in basins with dams: \n")
    config_data = GagesConfig(cfg)

    source_data_dor1 = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        DOR=dor)
    # basins with dams
    source_data_withdams = GagesSource.choose_some_basins(
        config_data,
        config_data.model_dict["data"]["tRangeTrain"],
        screen_basin_area_huc4=False,
        dam_num=[1, 100000])

    sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
    sites_id_withdams = source_data_withdams.all_configs['flow_screen_gage_id']
    sites_id_chosen = np.intersect1d(np.array(sites_id_dor1),
                                     np.array(sites_id_withdams)).tolist()

    gages_model = GagesModels(config_data,
                              screen_basin_area_huc4=False,
                              sites_id=sites_id_chosen)
    gages_model_train = gages_model.data_model_train
    gages_model_test = gages_model.data_model_test
    if cache:
        save_datamodel(gages_model_train,
                       data_source_file_name='data_source.txt',
                       stat_file_name='Statistics.json',
                       flow_file_name='flow',
                       forcing_file_name='forcing',
                       attr_file_name='attr',
                       f_dict_file_name='dictFactorize.json',
                       var_dict_file_name='dictAttribute.json',
                       t_s_dict_file_name='dictTimeSpace.json')
        save_datamodel(gages_model_test,
                       data_source_file_name='test_data_source.txt',
                       stat_file_name='test_Statistics.json',
                       flow_file_name='test_flow',
                       forcing_file_name='test_forcing',
                       attr_file_name='test_attr',
                       f_dict_file_name='test_dictFactorize.json',
                       var_dict_file_name='test_dictAttribute.json',
                       t_s_dict_file_name='test_dictTimeSpace.json')
    with torch.cuda.device(gpu_num):
        if train_mode:
            master_train(gages_model_train, random_seed=random_seed)
        pred, obs = master_test(gages_model_test, epoch=test_epoch)
        basin_area = gages_model_test.data_source.read_attr(
            gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'],
            is_return_dict=False)
        mean_prep = gages_model_test.data_source.read_attr(
            gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
            is_return_dict=False)
        mean_prep = mean_prep / 365 * 10
        pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
        obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
        save_result(gages_model_test.data_source.data_config.data_path['Temp'],
                    test_epoch, pred, obs)
Ejemplo n.º 11
0
    def test_zero_small_dor_basins_locations(self):
        conus_exps = self.exp_lst
        test_epoch = self.test_epoch
        inds_df, pred, obs = load_ensemble_result(self.config_file,
                                                  conus_exps,
                                                  test_epoch,
                                                  return_value=True)
        conus_config_data = load_dataconfig_case_exp(self.config_file,
                                                     conus_exps[0])
        conus_data_model = GagesModel.load_datamodel(
            conus_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites = conus_data_model.t_s_dict["sites_id"]

        all_lat = conus_data_model.data_source.gage_dict["LAT_GAGE"]
        all_lon = conus_data_model.data_source.gage_dict["LNG_GAGE"]
        show_ind_key = "NSE"
        attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"]
        attrs = conus_data_model.data_source.read_attr(conus_sites,
                                                       attr_lst,
                                                       is_return_dict=False)

        western_lon_idx = [i for i in range(all_lon.size) if all_lon[i] < -100]

        nse_range = [0, 1]
        idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & (
            inds_df[show_ind_key] < nse_range[1])].index.tolist()
        idx_lst_nse = np.intersect1d(western_lon_idx, idx_lst_nse)

        # small dor
        source_data_dor1 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=-self.dor)

        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()

        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus,
                                          idx_lst_nse).tolist()
        type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus,
                                          idx_lst_nse).tolist()
        pd.DataFrame({
            "GAGE_ID": np.array(conus_sites)[type_1_index_lst]
        }).to_csv(
            os.path.join(conus_config_data.data_path["Out"],
                         "western-zero-dor-sites.csv"))
        pd.DataFrame({
            "GAGE_ID": np.array(conus_sites)[type_2_index_lst]
        }).to_csv(
            os.path.join(conus_config_data.data_path["Out"],
                         "western-small-dor-sites.csv"))
        frame = []
        df_type1 = pd.DataFrame({
            "type":
            np.full(len(type_1_index_lst), "zero-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_1_index_lst],
            "lat":
            all_lat[type_1_index_lst],
            "lon":
            all_lon[type_1_index_lst],
            "slope":
            attrs[type_1_index_lst, 0],
            "elevation":
            attrs[type_1_index_lst, 1]
        })
        frame.append(df_type1)
        df_type2 = pd.DataFrame({
            "type":
            np.full(len(type_2_index_lst), "small-dor"),
            show_ind_key:
            inds_df[show_ind_key].values[type_2_index_lst],
            "lat":
            all_lat[type_2_index_lst],
            "lon":
            all_lon[type_2_index_lst],
            "slope":
            attrs[type_2_index_lst, 0],
            "elevation":
            attrs[type_2_index_lst, 1]
        })
        frame.append(df_type2)
        data_df = pd.concat(frame)
        idx_lst = [
            np.arange(len(type_1_index_lst)),
            np.arange(len(type_1_index_lst),
                      len(type_1_index_lst) + len(type_2_index_lst))
        ]
        plot_gages_map_and_scatter(data_df,
                                   [show_ind_key, "lat", "lon", "slope"],
                                   idx_lst,
                                   cmap_strs=["Reds", "Blues"],
                                   labels=["zero-dor", "small-dor"],
                                   scatter_label=[attr_lst[0], show_ind_key],
                                   wspace=2,
                                   hspace=1.5,
                                   legend_y=.8,
                                   sub_fig_ratio=[6, 4, 1])
        plt.tight_layout()
        plt.show()
Ejemplo n.º 12
0
    def test_diff_dor(self):
        dor_1 = -self.dor
        dor_2 = self.dor
        test_epoch = self.test_epoch
        config_file = self.config_file

        conus_exps = ["basic_exp37"]
        pair1_exps = ["dam_exp1"]
        pair2_exps = ["nodam_exp7"]
        pair3_exps = ["dam_exp27"]
        nodam_exp_lst = ["nodam_exp1"]
        smalldam_exp_lst = [
            "dam_exp17"
        ]  # -0.003["dam_exp11"] -0.08["dam_exp17"] -1["dam_exp32"]
        largedam_exp_lst = [
            "dam_exp4"
        ]  # 0.003["dam_exp12"] 0.08["dam_exp18"] 1["dam_exp33"]
        pair1_config_data = load_dataconfig_case_exp(config_file,
                                                     pair1_exps[0])
        pair2_config_data = load_dataconfig_case_exp(config_file,
                                                     pair2_exps[0])
        pair3_config_data = load_dataconfig_case_exp(config_file,
                                                     pair3_exps[0])
        conus_config_data = load_dataconfig_case_exp(config_file,
                                                     conus_exps[0])

        conus_data_model = GagesModel.load_datamodel(
            conus_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        conus_sites = conus_data_model.t_s_dict["sites_id"]

        source_data_dor1 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_1)
        source_data_dor2 = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            DOR=dor_2)
        # basins with dams
        source_data_withdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=[1, 10000])
        # basins without dams
        source_data_withoutdams = GagesSource.choose_some_basins(
            conus_config_data,
            conus_config_data.model_dict["data"]["tRangeTrain"],
            screen_basin_area_huc4=False,
            dam_num=0)

        sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id']
        sites_id_withdams = source_data_withdams.all_configs[
            'flow_screen_gage_id']

        sites_id_nodam = source_data_withoutdams.all_configs[
            'flow_screen_gage_id']
        sites_id_smalldam = np.intersect1d(
            np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist()
        sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id']

        # sites_id_nolargedam = np.sort(np.union1d(np.array(sites_id_nodam), np.array(sites_id_largedam))).tolist()
        # pair1_sites = np.sort(np.intersect1d(np.array(sites_id_dor1), np.array(conus_sites))).tolist()
        # pair2_sites = np.sort(np.intersect1d(np.array(sites_id_nolargedam), np.array(conus_sites))).tolist()
        # pair3_sites = np.sort(np.intersect1d(np.array(sites_id_withdams), np.array(conus_sites))).tolist()

        pair1_data_model = GagesModel.load_datamodel(
            pair1_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair1_sites = pair1_data_model.t_s_dict["sites_id"]
        pair2_data_model = GagesModel.load_datamodel(
            pair2_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair2_sites = pair2_data_model.t_s_dict["sites_id"]
        pair3_data_model = GagesModel.load_datamodel(
            pair3_config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        pair3_sites = pair3_data_model.t_s_dict["sites_id"]

        idx_lst_nodam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_nodam
        ]
        idx_lst_nodam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_nodam
        ]

        idx_lst_smalldam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_smalldam
        ]
        idx_lst_smalldam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_smalldam
        ]

        idx_lst_largedam_in_pair1 = [
            i for i in range(len(pair1_sites))
            if pair1_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_pair2 = [
            i for i in range(len(pair2_sites))
            if pair2_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_pair3 = [
            i for i in range(len(pair3_sites))
            if pair3_sites[i] in sites_id_largedam
        ]
        idx_lst_largedam_in_conus = [
            i for i in range(len(conus_sites))
            if conus_sites[i] in sites_id_largedam
        ]

        print("multi box")
        inds_df_pair1 = load_ensemble_result(config_file, pair1_exps,
                                             test_epoch)
        inds_df_pair2 = load_ensemble_result(config_file, pair2_exps,
                                             test_epoch)
        inds_df_pair3 = load_ensemble_result(config_file, pair3_exps,
                                             test_epoch)
        inds_df_conus = load_ensemble_result(config_file, conus_exps,
                                             test_epoch)

        fig = plt.figure(figsize=(15, 8))
        gs = gridspec.GridSpec(1, 3)
        keys_nse = "NSE"
        color_chosen = ["Greens", "Blues", "Reds"]
        median_loc = 0.015
        decimal_places = 2
        sns.despine()
        sns.set(font_scale=1.5)

        attr_nodam = "zero_dor"
        cases_exps_legends_nodam = [
            "LSTM-Z", "LSTM-ZS", "LSTM-ZL", "LSTM-CONUS"
        ]
        frames_nodam = []
        inds_df_nodam = load_ensemble_result(config_file, nodam_exp_lst,
                                             test_epoch)
        df_nodam_alone = pd.DataFrame({
            attr_nodam:
            np.full([inds_df_nodam.shape[0]], cases_exps_legends_nodam[0]),
            keys_nse:
            inds_df_nodam[keys_nse]
        })
        frames_nodam.append(df_nodam_alone)

        df_nodam_in_pair1 = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1].shape[0]
            ], cases_exps_legends_nodam[1]),
            keys_nse:
            inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1]
        })
        frames_nodam.append(df_nodam_in_pair1)

        df_nodam_in_pair2 = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2].shape[0]
            ], cases_exps_legends_nodam[2]),
            keys_nse:
            inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2]
        })
        frames_nodam.append(df_nodam_in_pair2)

        df_nodam_in_conus = pd.DataFrame({
            attr_nodam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus].shape[0]
            ], cases_exps_legends_nodam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus]
        })
        frames_nodam.append(df_nodam_in_conus)
        result_nodam = pd.concat(frames_nodam)
        ax1 = plt.subplot(gs[0])
        # ax1.set_title("(a)")
        ax1.set_xticklabels(ax1.get_xticklabels(), rotation=30)
        ax1.set_ylim([0, 1])
        sns.boxplot(ax=ax1,
                    x=attr_nodam,
                    y=keys_nse,
                    data=result_nodam,
                    showfliers=False,
                    palette=color_chosen[0])
        medians_nodam = result_nodam.groupby(
            [attr_nodam], sort=False)[keys_nse].median().values
        median_labels_nodam = [
            str(np.round(s, decimal_places)) for s in medians_nodam
        ]
        pos1 = range(len(medians_nodam))
        for tick, label in zip(pos1, ax1.get_xticklabels()):
            ax1.text(pos1[tick],
                     medians_nodam[tick] + median_loc,
                     median_labels_nodam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')

        attr_smalldam = "small_dor"
        cases_exps_legends_smalldam = [
            "LSTM-S", "LSTM-ZS", "LSTM-SL", "LSTM-CONUS"
        ]
        frames_smalldam = []
        inds_df_smalldam = load_ensemble_result(config_file, smalldam_exp_lst,
                                                test_epoch)
        df_smalldam_alone = pd.DataFrame({
            attr_smalldam:
            np.full([inds_df_smalldam.shape[0]],
                    cases_exps_legends_smalldam[0]),
            keys_nse:
            inds_df_smalldam[keys_nse]
        })
        frames_smalldam.append(df_smalldam_alone)

        df_smalldam_in_pair1 = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1].
                shape[0]
            ], cases_exps_legends_smalldam[1]),
            keys_nse:
            inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1]
        })
        frames_smalldam.append(df_smalldam_in_pair1)

        df_smalldam_in_pair3 = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3].
                shape[0]
            ], cases_exps_legends_smalldam[2]),
            keys_nse:
            inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3]
        })
        frames_smalldam.append(df_smalldam_in_pair3)

        df_smalldam_in_conus = pd.DataFrame({
            attr_smalldam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus].
                shape[0]
            ], cases_exps_legends_smalldam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus]
        })
        frames_smalldam.append(df_smalldam_in_conus)
        result_smalldam = pd.concat(frames_smalldam)
        ax2 = plt.subplot(gs[1])
        # ax2.set_title("(b)")
        ax2.set_xticklabels(ax2.get_xticklabels(), rotation=30)
        ax2.set_ylim([0, 1])
        ax2.set(ylabel=None)
        sns.boxplot(ax=ax2,
                    x=attr_smalldam,
                    y=keys_nse,
                    data=result_smalldam,
                    showfliers=False,
                    palette=color_chosen[1])
        medians_smalldam = result_smalldam.groupby(
            [attr_smalldam], sort=False)[keys_nse].median().values
        median_labels_smalldam = [
            str(np.round(s, decimal_places)) for s in medians_smalldam
        ]
        pos2 = range(len(medians_smalldam))
        for tick, label in zip(pos2, ax2.get_xticklabels()):
            ax2.text(pos2[tick],
                     medians_smalldam[tick] + median_loc,
                     median_labels_smalldam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')

        attr_largedam = "large_dor"
        cases_exps_legends_largedam = [
            "LSTM-L", "LSTM-ZL", "LSTM-SL", "LSTM-CONUS"
        ]
        frames_largedam = []
        inds_df_largedam = load_ensemble_result(config_file, largedam_exp_lst,
                                                test_epoch)
        df_largedam_alone = pd.DataFrame({
            attr_largedam:
            np.full([inds_df_largedam.shape[0]],
                    cases_exps_legends_largedam[0]),
            keys_nse:
            inds_df_largedam[keys_nse]
        })
        frames_largedam.append(df_largedam_alone)

        df_largedam_in_pair2 = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2].
                shape[0]
            ], cases_exps_legends_largedam[1]),
            keys_nse:
            inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2]
        })
        frames_largedam.append(df_largedam_in_pair2)

        df_largedam_in_pair3 = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3].
                shape[0]
            ], cases_exps_legends_largedam[2]),
            keys_nse:
            inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3]
        })
        frames_largedam.append(df_largedam_in_pair3)

        df_largedam_in_conus = pd.DataFrame({
            attr_largedam:
            np.full([
                inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus].
                shape[0]
            ], cases_exps_legends_largedam[3]),
            keys_nse:
            inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus]
        })
        frames_largedam.append(df_largedam_in_conus)
        result_largedam = pd.concat(frames_largedam)
        ax3 = plt.subplot(gs[2])
        # ax3.set_title("(c)")
        ax3.set_xticklabels(ax3.get_xticklabels(), rotation=30)
        ax3.set_ylim([0, 1])
        ax3.set(ylabel=None)
        sns.boxplot(ax=ax3,
                    x=attr_largedam,
                    y=keys_nse,
                    data=result_largedam,
                    showfliers=False,
                    palette=color_chosen[2])
        medians_largedam = result_largedam.groupby(
            [attr_largedam], sort=False)[keys_nse].median().values
        median_labels_largedam = [
            str(np.round(s, decimal_places)) for s in medians_largedam
        ]
        pos3 = range(len(medians_largedam))
        for tick, label in zip(pos3, ax3.get_xticklabels()):
            ax3.text(pos3[tick],
                     medians_largedam[tick] + median_loc,
                     median_labels_largedam[tick],
                     horizontalalignment='center',
                     size='x-small',
                     weight='semibold')
        # sns.despine()
        plt.tight_layout()
        plt.show()