def test_forecast_data_temp_test(self): sim_config_data = self.sim_config_data sim_source_data = GagesSource( sim_config_data, sim_config_data.model_dict["data"]["tRangeTest"]) sim_df = DataModel(sim_source_data) save_datamodel(sim_df, "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') config_data = self.config_data source_data = GagesSource(config_data, config_data.model_dict["data"]["tRangeTest"]) df = DataModel(source_data) save_datamodel(df, "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json')
def test_check_streamflow_data(self): source_data = GagesSource( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False) t_range_list = hydro_time.t_range_days(["1990-01-01", "2010-01-01"]) # data_temp = source_data.read_usge_gage("01", '01052500', t_range_list) data_temp = source_data.read_usge_gage("08", '08013000', t_range_list) print(data_temp) print(np.argwhere(np.isnan(data_temp)))
def test_screen_some_gauge_and_save(self): config_dir = definitions.CONFIG_DIR config_file = os.path.join(config_dir, "transdata/config_exp12.ini") subdir = r"transdata/exp12" config_data = GagesConfig.set_subdir(config_file, subdir) ref_source_data = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, ref="Ref") ref_sites_id = ref_source_data.all_configs['flow_screen_gage_id'] ref_sites_id_df = pd.DataFrame({"STAID": ref_sites_id}) dapeng_dir = os.path.join(self.config_data.data_path["DB"], "dapeng") if not os.path.isdir(dapeng_dir): os.makedirs(dapeng_dir) dapeng_v2_gageid_file = os.path.join(dapeng_dir, "v2.csv") ref_sites_id_df.to_csv(dapeng_v2_gageid_file, index=False) gages_model = GagesModels(config_data, screen_basin_area_huc4=False, major_dam_num=0) sites_id_df = pd.DataFrame( {"STAID": gages_model.data_model_train.t_s_dict["sites_id"]}) dapeng_v1_gageid_file = os.path.join(dapeng_dir, "v1.csv") sites_id_df.to_csv(dapeng_v1_gageid_file, index=False) print("read and save data screen")
def test_trans_all_forcing_file_to_camels(self): data_source_dump = os.path.join(self.config_data.data_path["Temp"], 'data_source.txt') source_data = unserialize_pickle(data_source_dump) output_dir = os.path.join(self.config_data.data_path["DB"], "basin_mean_forcing", "daymet") if not os.path.isdir(output_dir): os.mkdir(output_dir) region_names = [ region_temp.split("_")[-1] for region_temp in source_data.all_configs['regions'] ] # forcing data file generated is named as "allref", so rename the "all" region_names = ["allref" if r == "all" else r for r in region_names] year_start = int(source_data.t_range[0].split("-")[0]) year_end = int(source_data.t_range[1].split("-")[0]) years = np.arange(year_start, year_end) assert (all(x < y for x, y in zip(source_data.gage_dict['STAID'], source_data.gage_dict['STAID'][1:]))) config_dir = definitions.CONFIG_DIR for i in range(len(region_names)): config_file_i = os.path.join( config_dir, "transdata/config_exp" + str(i + 1) + ".ini") subdir_i = "transdata/exp" + str(i + 1) config_data_i = GagesConfig.set_subdir(config_file_i, subdir_i) source_data_i = GagesSource( config_data_i, config_data_i.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False) for year in years: trans_daymet_to_camels(source_data.all_configs["forcing_dir"], output_dir, source_data_i.gage_dict, region_names[i], year)
def test_explore_damcls_datamodel(self): config_data = self.config_data sites_id_dict = unserialize_json( "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json") sites_id = list(sites_id_dict.keys()) source_data_dor1 = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, sites_id=sites_id) norsto = source_data_dor1.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False) df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten()}) # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'), # quoting=csv.QUOTE_NONNUMERIC, index=None) df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NORSTOR.csv'), quoting=csv.QUOTE_NONNUMERIC, index=None)
def test_explore_dor_dam_num(self): config_data = self.config_data dor_2 = 0.1 source_data_dor2 = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id'] sites_id = np.intersect1d(np.array(self.sites_id), np.array(sites_id_largedam)).tolist() norsto = source_data_dor2.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False) dam_num = source_data_dor2.read_attr(sites_id, ["NDAMS_2009"], is_return_dict=False) df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten(), "DAM_NUM": dam_num.flatten()}) # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'), # quoting=csv.QUOTE_NONNUMERIC, index=None) sns.distplot(df["DAM_NUM"], bins=50) plt.show() df.to_csv(os.path.join(source_data_dor2.all_configs["out_dir"], '1185largedor_basins_NORSTOR_DAMNUM.csv'), quoting=csv.QUOTE_NONNUMERIC, index=None)
def test_explore_(self): config_data = self.config_data sites_id_dict = unserialize_json( "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json") sites_id = list(sites_id_dict.keys()) source_data_dor1 = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, sites_id=sites_id) nse_all = pd.read_csv( "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/output/gages/basic/exp37/3557basins_ID_NSE_DOR.csv", dtype={0: str}) sites_ids = nse_all["GAUGE ID"].values idx = [i for i in range(len(sites_ids)) if sites_ids[i] in sites_id] df = pd.DataFrame({"GAGE_ID": sites_id, "NSE": nse_all["NSE"].values[idx]}) # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'), # quoting=csv.QUOTE_NONNUMERIC, index=None) df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NSE.csv'), quoting=csv.QUOTE_NONNUMERIC, index=None)
def test_read_sites_id_see_dor(self): exp_lst = ["exp18", "exp19", "exp20", "exp21", "exp22", "exp23"] sub_lst = ["0", "1"] diff_lst = [ "dictTimeSpace.json", "test_dictTimeSpace.json", "test_dictTimeSpace_2.json" ] for exp_str in exp_lst: for sub_str in sub_lst: comp_sites = [] for item in diff_lst: gage_id_file = os.path.join( self.config_data.config_file["ROOT_DIR"], "temp", "gages", "ecoregion", exp_str, sub_str, item) usgs_id = unserialize_json(gage_id_file)["sites_id"] assert (all(x < y for x, y in zip(usgs_id, usgs_id[1:]))) comp_sites.append(usgs_id) # mm/year 1-km grid, megaliters total storage per sq km (1 megaliters = 1,000,000 liters = 1,000 cubic meters) # attr_lst = ["RUNAVE7100", "STOR_NID_2009"] attr_lst = ["RUNAVE7100", "STOR_NOR_2009"] source_data = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, sites_id=usgs_id) data_attr, var_dict, f_dict = source_data.read_attr( usgs_id, attr_lst) run_avg = data_attr[:, 0] * (10**(-3)) * (10**6 ) # m^3 per year nor_storage = data_attr[:, 1] * 1000 # m^3 dors = nor_storage / run_avg results = [round(i, 3) for i in dors] hydro_logger.info( exp_str + "-" + sub_str + "-" + item + " DOR: %s", results) hydro_logger.info( "the intersection of each pair of sites: %s, %s, %s", np.intersect1d(comp_sites[0], comp_sites[1]), np.intersect1d(comp_sites[0], comp_sites[2]), np.intersect1d(comp_sites[1], comp_sites[2]))
def test_gages_dam_stor_hist_basin(self): nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "test") dam_storages = unserialize_json( os.path.join(nid_dir, "dam_storages_dict.json")) sites = np.array(list(dam_storages.keys())) dor_2 = 0.02 source_data_dor2 = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id'] c, ind1, idx_lst_nse_range = np.intersect1d(sites, sites_id_largedam, return_indices=True) num = 4 num_lst = np.sort(np.random.choice(len(c), num, replace=False)) chosen_sites = c[num_lst] hist_bins = 20 fig = plt.figure(figsize=(8, 9)) gs = gridspec.GridSpec(2, 2) for i in range(num): ax_k = plt.subplot(gs[int(i / 2), i % 2]) ax_k.hist(dam_storages[chosen_sites[i]], hist_bins, orientation='vertical', color='red', alpha=0.5) plt.show()
def dam_lstm(args): update_cfg(cfg, args) random_seed = cfg.RANDOM_SEED test_epoch = cfg.TEST_EPOCH gpu_num = cfg.CTX train_mode = cfg.TRAIN_MODE dor = cfg.GAGES.attrScreenParams.DOR cache = cfg.CACHE.STATE print("train and test in basins with dams: \n") config_data = GagesConfig(cfg) source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 100000]) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs['flow_screen_gage_id'] sites_id_chosen = np.intersect1d(np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() gages_model = GagesModels(config_data, screen_basin_area_huc4=False, sites_id=sites_id_chosen) gages_model_train = gages_model.data_model_train gages_model_test = gages_model.data_model_test if cache: save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') with torch.cuda.device(gpu_num): if train_mode: master_train(gages_model_train, random_seed=random_seed) pred, obs = master_test(gages_model_test, epoch=test_epoch) basin_area = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result(gages_model_test.data_source.data_config.data_path['Temp'], test_epoch, pred, obs)
def test_zero_small_dor_basins_locations(self): conus_exps = self.exp_lst test_epoch = self.test_epoch inds_df, pred, obs = load_ensemble_result(self.config_file, conus_exps, test_epoch, return_value=True) conus_config_data = load_dataconfig_case_exp(self.config_file, conus_exps[0]) conus_data_model = GagesModel.load_datamodel( conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"] all_lat = conus_data_model.data_source.gage_dict["LAT_GAGE"] all_lon = conus_data_model.data_source.gage_dict["LNG_GAGE"] show_ind_key = "NSE" attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"] attrs = conus_data_model.data_source.read_attr(conus_sites, attr_lst, is_return_dict=False) western_lon_idx = [i for i in range(all_lon.size) if all_lon[i] < -100] nse_range = [0, 1] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() idx_lst_nse = np.intersect1d(western_lon_idx, idx_lst_nse) # small dor source_data_dor1 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=-self.dor) # basins with dams source_data_withdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus, idx_lst_nse).tolist() type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus, idx_lst_nse).tolist() pd.DataFrame({ "GAGE_ID": np.array(conus_sites)[type_1_index_lst] }).to_csv( os.path.join(conus_config_data.data_path["Out"], "western-zero-dor-sites.csv")) pd.DataFrame({ "GAGE_ID": np.array(conus_sites)[type_2_index_lst] }).to_csv( os.path.join(conus_config_data.data_path["Out"], "western-small-dor-sites.csv")) frame = [] df_type1 = pd.DataFrame({ "type": np.full(len(type_1_index_lst), "zero-dor"), show_ind_key: inds_df[show_ind_key].values[type_1_index_lst], "lat": all_lat[type_1_index_lst], "lon": all_lon[type_1_index_lst], "slope": attrs[type_1_index_lst, 0], "elevation": attrs[type_1_index_lst, 1] }) frame.append(df_type1) df_type2 = pd.DataFrame({ "type": np.full(len(type_2_index_lst), "small-dor"), show_ind_key: inds_df[show_ind_key].values[type_2_index_lst], "lat": all_lat[type_2_index_lst], "lon": all_lon[type_2_index_lst], "slope": attrs[type_2_index_lst, 0], "elevation": attrs[type_2_index_lst, 1] }) frame.append(df_type2) data_df = pd.concat(frame) idx_lst = [ np.arange(len(type_1_index_lst)), np.arange(len(type_1_index_lst), len(type_1_index_lst) + len(type_2_index_lst)) ] plot_gages_map_and_scatter(data_df, [show_ind_key, "lat", "lon", "slope"], idx_lst, cmap_strs=["Reds", "Blues"], labels=["zero-dor", "small-dor"], scatter_label=[attr_lst[0], show_ind_key], wspace=2, hspace=1.5, legend_y=.8, sub_fig_ratio=[6, 4, 1]) plt.tight_layout() plt.show()
def test_diff_dor(self): dor_1 = -self.dor dor_2 = self.dor test_epoch = self.test_epoch config_file = self.config_file conus_exps = ["basic_exp37"] pair1_exps = ["dam_exp1"] pair2_exps = ["nodam_exp7"] pair3_exps = ["dam_exp27"] nodam_exp_lst = ["nodam_exp1"] smalldam_exp_lst = [ "dam_exp17" ] # -0.003["dam_exp11"] -0.08["dam_exp17"] -1["dam_exp32"] largedam_exp_lst = [ "dam_exp4" ] # 0.003["dam_exp12"] 0.08["dam_exp18"] 1["dam_exp33"] pair1_config_data = load_dataconfig_case_exp(config_file, pair1_exps[0]) pair2_config_data = load_dataconfig_case_exp(config_file, pair2_exps[0]) pair3_config_data = load_dataconfig_case_exp(config_file, pair3_exps[0]) conus_config_data = load_dataconfig_case_exp(config_file, conus_exps[0]) conus_data_model = GagesModel.load_datamodel( conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"] source_data_dor1 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) source_data_dor2 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) # basins with dams source_data_withdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id'] # sites_id_nolargedam = np.sort(np.union1d(np.array(sites_id_nodam), np.array(sites_id_largedam))).tolist() # pair1_sites = np.sort(np.intersect1d(np.array(sites_id_dor1), np.array(conus_sites))).tolist() # pair2_sites = np.sort(np.intersect1d(np.array(sites_id_nolargedam), np.array(conus_sites))).tolist() # pair3_sites = np.sort(np.intersect1d(np.array(sites_id_withdams), np.array(conus_sites))).tolist() pair1_data_model = GagesModel.load_datamodel( pair1_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair1_sites = pair1_data_model.t_s_dict["sites_id"] pair2_data_model = GagesModel.load_datamodel( pair2_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair2_sites = pair2_data_model.t_s_dict["sites_id"] pair3_data_model = GagesModel.load_datamodel( pair3_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair3_sites = pair3_data_model.t_s_dict["sites_id"] idx_lst_nodam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_nodam ] idx_lst_nodam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_nodam ] idx_lst_nodam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_nodam ] idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] idx_lst_largedam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_largedam ] idx_lst_largedam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_largedam ] idx_lst_largedam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_largedam ] idx_lst_largedam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_largedam ] print("multi box") inds_df_pair1 = load_ensemble_result(config_file, pair1_exps, test_epoch) inds_df_pair2 = load_ensemble_result(config_file, pair2_exps, test_epoch) inds_df_pair3 = load_ensemble_result(config_file, pair3_exps, test_epoch) inds_df_conus = load_ensemble_result(config_file, conus_exps, test_epoch) fig = plt.figure(figsize=(15, 8)) gs = gridspec.GridSpec(1, 3) keys_nse = "NSE" color_chosen = ["Greens", "Blues", "Reds"] median_loc = 0.015 decimal_places = 2 sns.despine() sns.set(font_scale=1.5) attr_nodam = "zero_dor" cases_exps_legends_nodam = [ "LSTM-Z", "LSTM-ZS", "LSTM-ZL", "LSTM-CONUS" ] frames_nodam = [] inds_df_nodam = load_ensemble_result(config_file, nodam_exp_lst, test_epoch) df_nodam_alone = pd.DataFrame({ attr_nodam: np.full([inds_df_nodam.shape[0]], cases_exps_legends_nodam[0]), keys_nse: inds_df_nodam[keys_nse] }) frames_nodam.append(df_nodam_alone) df_nodam_in_pair1 = pd.DataFrame({ attr_nodam: np.full([ inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1].shape[0] ], cases_exps_legends_nodam[1]), keys_nse: inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1] }) frames_nodam.append(df_nodam_in_pair1) df_nodam_in_pair2 = pd.DataFrame({ attr_nodam: np.full([ inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2].shape[0] ], cases_exps_legends_nodam[2]), keys_nse: inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2] }) frames_nodam.append(df_nodam_in_pair2) df_nodam_in_conus = pd.DataFrame({ attr_nodam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus].shape[0] ], cases_exps_legends_nodam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus] }) frames_nodam.append(df_nodam_in_conus) result_nodam = pd.concat(frames_nodam) ax1 = plt.subplot(gs[0]) # ax1.set_title("(a)") ax1.set_xticklabels(ax1.get_xticklabels(), rotation=30) ax1.set_ylim([0, 1]) sns.boxplot(ax=ax1, x=attr_nodam, y=keys_nse, data=result_nodam, showfliers=False, palette=color_chosen[0]) medians_nodam = result_nodam.groupby( [attr_nodam], sort=False)[keys_nse].median().values median_labels_nodam = [ str(np.round(s, decimal_places)) for s in medians_nodam ] pos1 = range(len(medians_nodam)) for tick, label in zip(pos1, ax1.get_xticklabels()): ax1.text(pos1[tick], medians_nodam[tick] + median_loc, median_labels_nodam[tick], horizontalalignment='center', size='x-small', weight='semibold') attr_smalldam = "small_dor" cases_exps_legends_smalldam = [ "LSTM-S", "LSTM-ZS", "LSTM-SL", "LSTM-CONUS" ] frames_smalldam = [] inds_df_smalldam = load_ensemble_result(config_file, smalldam_exp_lst, test_epoch) df_smalldam_alone = pd.DataFrame({ attr_smalldam: np.full([inds_df_smalldam.shape[0]], cases_exps_legends_smalldam[0]), keys_nse: inds_df_smalldam[keys_nse] }) frames_smalldam.append(df_smalldam_alone) df_smalldam_in_pair1 = pd.DataFrame({ attr_smalldam: np.full([ inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1]. shape[0] ], cases_exps_legends_smalldam[1]), keys_nse: inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1] }) frames_smalldam.append(df_smalldam_in_pair1) df_smalldam_in_pair3 = pd.DataFrame({ attr_smalldam: np.full([ inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3]. shape[0] ], cases_exps_legends_smalldam[2]), keys_nse: inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3] }) frames_smalldam.append(df_smalldam_in_pair3) df_smalldam_in_conus = pd.DataFrame({ attr_smalldam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus]. shape[0] ], cases_exps_legends_smalldam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus] }) frames_smalldam.append(df_smalldam_in_conus) result_smalldam = pd.concat(frames_smalldam) ax2 = plt.subplot(gs[1]) # ax2.set_title("(b)") ax2.set_xticklabels(ax2.get_xticklabels(), rotation=30) ax2.set_ylim([0, 1]) ax2.set(ylabel=None) sns.boxplot(ax=ax2, x=attr_smalldam, y=keys_nse, data=result_smalldam, showfliers=False, palette=color_chosen[1]) medians_smalldam = result_smalldam.groupby( [attr_smalldam], sort=False)[keys_nse].median().values median_labels_smalldam = [ str(np.round(s, decimal_places)) for s in medians_smalldam ] pos2 = range(len(medians_smalldam)) for tick, label in zip(pos2, ax2.get_xticklabels()): ax2.text(pos2[tick], medians_smalldam[tick] + median_loc, median_labels_smalldam[tick], horizontalalignment='center', size='x-small', weight='semibold') attr_largedam = "large_dor" cases_exps_legends_largedam = [ "LSTM-L", "LSTM-ZL", "LSTM-SL", "LSTM-CONUS" ] frames_largedam = [] inds_df_largedam = load_ensemble_result(config_file, largedam_exp_lst, test_epoch) df_largedam_alone = pd.DataFrame({ attr_largedam: np.full([inds_df_largedam.shape[0]], cases_exps_legends_largedam[0]), keys_nse: inds_df_largedam[keys_nse] }) frames_largedam.append(df_largedam_alone) df_largedam_in_pair2 = pd.DataFrame({ attr_largedam: np.full([ inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2]. shape[0] ], cases_exps_legends_largedam[1]), keys_nse: inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2] }) frames_largedam.append(df_largedam_in_pair2) df_largedam_in_pair3 = pd.DataFrame({ attr_largedam: np.full([ inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3]. shape[0] ], cases_exps_legends_largedam[2]), keys_nse: inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3] }) frames_largedam.append(df_largedam_in_pair3) df_largedam_in_conus = pd.DataFrame({ attr_largedam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus]. shape[0] ], cases_exps_legends_largedam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus] }) frames_largedam.append(df_largedam_in_conus) result_largedam = pd.concat(frames_largedam) ax3 = plt.subplot(gs[2]) # ax3.set_title("(c)") ax3.set_xticklabels(ax3.get_xticklabels(), rotation=30) ax3.set_ylim([0, 1]) ax3.set(ylabel=None) sns.boxplot(ax=ax3, x=attr_largedam, y=keys_nse, data=result_largedam, showfliers=False, palette=color_chosen[2]) medians_largedam = result_largedam.groupby( [attr_largedam], sort=False)[keys_nse].median().values median_labels_largedam = [ str(np.round(s, decimal_places)) for s in medians_largedam ] pos3 = range(len(medians_largedam)) for tick, label in zip(pos3, ax3.get_xticklabels()): ax3.text(pos3[tick], medians_largedam[tick] + median_loc, median_labels_largedam[tick], horizontalalignment='center', size='x-small', weight='semibold') # sns.despine() plt.tight_layout() plt.show()