def test_screen_some_gauge_and_save(self): config_dir = definitions.CONFIG_DIR config_file = os.path.join(config_dir, "transdata/config_exp12.ini") subdir = r"transdata/exp12" config_data = GagesConfig.set_subdir(config_file, subdir) ref_source_data = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, ref="Ref") ref_sites_id = ref_source_data.all_configs['flow_screen_gage_id'] ref_sites_id_df = pd.DataFrame({"STAID": ref_sites_id}) dapeng_dir = os.path.join(self.config_data.data_path["DB"], "dapeng") if not os.path.isdir(dapeng_dir): os.makedirs(dapeng_dir) dapeng_v2_gageid_file = os.path.join(dapeng_dir, "v2.csv") ref_sites_id_df.to_csv(dapeng_v2_gageid_file, index=False) gages_model = GagesModels(config_data, screen_basin_area_huc4=False, major_dam_num=0) sites_id_df = pd.DataFrame( {"STAID": gages_model.data_model_train.t_s_dict["sites_id"]}) dapeng_v1_gageid_file = os.path.join(dapeng_dir, "v1.csv") sites_id_df.to_csv(dapeng_v1_gageid_file, index=False) print("read and save data screen")
def test_nonref_interscet_camels(self): t_train = self.config_data.model_dict["data"]["tRangeTrain"] camels_source_data = CamelsSource(self.camels_config_data, t_train) source_data = GagesSource.choose_some_basins(self.config_data, t_train, ref="Non-ref") camels_ids = np.array(camels_source_data.gage_dict["id"]) assert (all(x < y for x, y in zip(camels_ids, camels_ids[1:]))) gages_id = np.array(source_data.all_configs["flow_screen_gage_id"]) intersect_ids = np.intersect1d(camels_ids, gages_id) print(intersect_ids)
def test_explore_damcls_datamodel(self): config_data = self.config_data sites_id_dict = unserialize_json( "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json") sites_id = list(sites_id_dict.keys()) source_data_dor1 = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, sites_id=sites_id) norsto = source_data_dor1.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False) df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten()}) # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'), # quoting=csv.QUOTE_NONNUMERIC, index=None) df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NORSTOR.csv'), quoting=csv.QUOTE_NONNUMERIC, index=None)
def test_major_dam_interscet_camels(self): # choose basins with major dams' num >= 1 t_train = self.config_data.model_dict["data"]["tRangeTrain"] camels_source_data = CamelsSource(self.camels_config_data, t_train) conus_source_data = GagesSource.choose_some_basins(self.conus_config_data, t_train, screen_basin_area_huc4=False, major_dam_num=0) camels_ids = np.array(camels_source_data.gage_dict["id"]) assert (all(x < y for x, y in zip(camels_ids, camels_ids[1:]))) gages_id = np.array(conus_source_data.all_configs["flow_screen_gage_id"]) intersect_ids = np.intersect1d(camels_ids, gages_id) print(intersect_ids.size) print(intersect_ids) source_data_ref = GagesSource.choose_some_basins(self.conus_config_data, t_train, screen_basin_area_huc4=False, ref='Ref') gages_id_ref = np.array(source_data_ref.all_configs["flow_screen_gage_id"]) intersect_ids_ref = np.intersect1d(gages_id, gages_id_ref) print(intersect_ids_ref.size) print(intersect_ids_ref) source_data_nonref = GagesSource.choose_some_basins(self.conus_config_data, t_train, screen_basin_area_huc4=False, ref='Non-ref') gages_id_nonref = np.array(source_data_nonref.all_configs["flow_screen_gage_id"]) intersect_ids_nonref = np.intersect1d(gages_id, gages_id_nonref) print(intersect_ids_nonref.size) print(intersect_ids_nonref)
def test_explore_dor_dam_num(self): config_data = self.config_data dor_2 = 0.1 source_data_dor2 = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id'] sites_id = np.intersect1d(np.array(self.sites_id), np.array(sites_id_largedam)).tolist() norsto = source_data_dor2.read_attr(sites_id, ["STOR_NOR_2009"], is_return_dict=False) dam_num = source_data_dor2.read_attr(sites_id, ["NDAMS_2009"], is_return_dict=False) df = pd.DataFrame({"GAGE_ID": sites_id, "STOR_NOR": norsto.flatten(), "DAM_NUM": dam_num.flatten()}) # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'), # quoting=csv.QUOTE_NONNUMERIC, index=None) sns.distplot(df["DAM_NUM"], bins=50) plt.show() df.to_csv(os.path.join(source_data_dor2.all_configs["out_dir"], '1185largedor_basins_NORSTOR_DAMNUM.csv'), quoting=csv.QUOTE_NONNUMERIC, index=None)
def test_explore_(self): config_data = self.config_data sites_id_dict = unserialize_json( "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/data/gages/nid/test/dam_main_purpose_dict.json") sites_id = list(sites_id_dict.keys()) source_data_dor1 = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, sites_id=sites_id) nse_all = pd.read_csv( "/mnt/data/owen411/code/hydro-anthropogenic-lstm/example/output/gages/basic/exp37/3557basins_ID_NSE_DOR.csv", dtype={0: str}) sites_ids = nse_all["GAUGE ID"].values idx = [i for i in range(len(sites_ids)) if sites_ids[i] in sites_id] df = pd.DataFrame({"GAGE_ID": sites_id, "NSE": nse_all["NSE"].values[idx]}) # df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '3557basins_NORSTOR.csv'), # quoting=csv.QUOTE_NONNUMERIC, index=None) df.to_csv(os.path.join(source_data_dor1.all_configs["out_dir"], '2909basins_NSE.csv'), quoting=csv.QUOTE_NONNUMERIC, index=None)
def test_read_sites_id_see_dor(self): exp_lst = ["exp18", "exp19", "exp20", "exp21", "exp22", "exp23"] sub_lst = ["0", "1"] diff_lst = [ "dictTimeSpace.json", "test_dictTimeSpace.json", "test_dictTimeSpace_2.json" ] for exp_str in exp_lst: for sub_str in sub_lst: comp_sites = [] for item in diff_lst: gage_id_file = os.path.join( self.config_data.config_file["ROOT_DIR"], "temp", "gages", "ecoregion", exp_str, sub_str, item) usgs_id = unserialize_json(gage_id_file)["sites_id"] assert (all(x < y for x, y in zip(usgs_id, usgs_id[1:]))) comp_sites.append(usgs_id) # mm/year 1-km grid, megaliters total storage per sq km (1 megaliters = 1,000,000 liters = 1,000 cubic meters) # attr_lst = ["RUNAVE7100", "STOR_NID_2009"] attr_lst = ["RUNAVE7100", "STOR_NOR_2009"] source_data = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, sites_id=usgs_id) data_attr, var_dict, f_dict = source_data.read_attr( usgs_id, attr_lst) run_avg = data_attr[:, 0] * (10**(-3)) * (10**6 ) # m^3 per year nor_storage = data_attr[:, 1] * 1000 # m^3 dors = nor_storage / run_avg results = [round(i, 3) for i in dors] hydro_logger.info( exp_str + "-" + sub_str + "-" + item + " DOR: %s", results) hydro_logger.info( "the intersection of each pair of sites: %s, %s, %s", np.intersect1d(comp_sites[0], comp_sites[1]), np.intersect1d(comp_sites[0], comp_sites[2]), np.intersect1d(comp_sites[1], comp_sites[2]))
def test_some_reservoirs(self): """choose some small reservoirs to train and test""" # 读取模型配置文件 config_data = self.config_data source_data = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], major_dam=1) sites_id = source_data.all_configs['flow_screen_gage_id'] quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "allnonref_85-05_nan-0.1_00-1.0") data_model_train = GagesModel.load_datamodel(data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel(data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_train = GagesModel.update_data_model(self.config_data, data_model_train, sites_id_update=sites_id) gages_model_test = GagesModel.update_data_model(self.config_data, data_model_test, sites_id_update=sites_id, train_stat_dict=gages_model_train.stat_dict) save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def test_gages_dam_stor_hist_basin(self): nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "test") dam_storages = unserialize_json( os.path.join(nid_dir, "dam_storages_dict.json")) sites = np.array(list(dam_storages.keys())) dor_2 = 0.02 source_data_dor2 = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id'] c, ind1, idx_lst_nse_range = np.intersect1d(sites, sites_id_largedam, return_indices=True) num = 4 num_lst = np.sort(np.random.choice(len(c), num, replace=False)) chosen_sites = c[num_lst] hist_bins = 20 fig = plt.figure(figsize=(8, 9)) gs = gridspec.GridSpec(2, 2) for i in range(num): ax_k = plt.subplot(gs[int(i / 2), i % 2]) ax_k.hist(dam_storages[chosen_sites[i]], hist_bins, orientation='vertical', color='red', alpha=0.5) plt.show()
attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') camels531_gageid_file = os.path.join(zerodor_config_data.data_path["DB"], "camels531", "camels531.txt") gauge_df = pd.read_csv(camels531_gageid_file, dtype={"GaugeID": str}) gauge_list = gauge_df["GaugeID"].values all_sites_camels_531 = np.sort( [str(gauge).zfill(8) for gauge in gauge_list]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( zerodor_config_data, zerodor_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_zerodor = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_zero_dor_not_in_camels = [ sites_id_zerodor[i] for i in range(len(sites_id_zerodor)) if sites_id_zerodor[i] not in all_sites_camels_531 ] smalldor_config_data = load_dataconfig_case_exp( cfg, camels_pub_on_diff_dor_exp_lst[1]) source_data_dor1 = GagesSource.choose_some_basins( smalldor_config_data, smalldor_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False,
def test_plot_map_cartopy_multi_vars(self): conus_exps = ["basic_exp37"] config_data = load_dataconfig_case_exp(cfg, conus_exps[0]) dor_1 = -0.02 source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') all_lat = data_model.data_source.gage_dict["LAT_GAGE"] all_lon = data_model.data_source.gage_dict["LNG_GAGE"] conus_sites = data_model.t_s_dict["sites_id"] idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"] attrs = data_model.data_source.read_attr(conus_sites, attr_lst, is_return_dict=False) test_epoch = 300 inds_df, pred, obs = load_ensemble_result(cfg, conus_exps, test_epoch, return_value=True) show_ind_key = "NSE" nse_range = [0, 1] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus, idx_lst_nse).tolist() type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus, idx_lst_nse).tolist() frame = [] df_type1 = pd.DataFrame({ "type": np.full(len(type_1_index_lst), "zero-dor"), show_ind_key: inds_df[show_ind_key].values[type_1_index_lst], "lat": all_lat[type_1_index_lst], "lon": all_lon[type_1_index_lst], "slope": attrs[type_1_index_lst, 0], "elevation": attrs[type_1_index_lst, 1] }) frame.append(df_type1) df_type2 = pd.DataFrame({ "type": np.full(len(type_2_index_lst), "small-dor"), show_ind_key: inds_df[show_ind_key].values[type_2_index_lst], "lat": all_lat[type_2_index_lst], "lon": all_lon[type_2_index_lst], "slope": attrs[type_2_index_lst, 0], "elevation": attrs[type_2_index_lst, 1] }) frame.append(df_type2) data_df = pd.concat(frame) idx_lst = [ np.arange(len(type_1_index_lst)), np.arange(len(type_1_index_lst), len(type_1_index_lst) + len(type_2_index_lst)) ] plot_gages_map_and_scatter(data_df, [show_ind_key, "lat", "lon", "elevation"], idx_lst, cmap_strs=["Reds", "Blues"], labels=["zero-dor", "small-dor"], scatter_label=[attr_lst[1], show_ind_key]) # matplotlib.rcParams.update({'font.size': 12}) plt.tight_layout() plt.show()
conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"] dor_1 = -dor_cutoff dor_2 = dor_cutoff source_data_dor1 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) source_data_dor2 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) # basins with dams source_data_withdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins(
def test_some_reservoirs(self): """choose some small reservoirs randomly to train and test""" # 读取模型配置文件 config_data = self.config_data_1 # according to paper "High-resolution mapping of the world's reservoirs and dams for sustainable river-flow management" dor = -0.02 source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], DOR=dor) sites_id = source_data.all_configs['flow_screen_gage_id'] # data1 is historical data as input of LSTM-Inv, which will be a kernel for the second LSTM quick_data_dir = os.path.join(self.config_data_1.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "allnonref_85-05_nan-0.1_00-1.0") # for inv model, datamodel of train and test are same data_model_8595 = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') # for 2nd model, datamodel of train and test belong to parts of the test time data_model_9505 = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') t_range1_train = self.config_data_1.model_dict["data"]["tRangeTrain"] t_range1_test = self.config_data_1.model_dict["data"]["tRangeTest"] gages_model1_train = GagesModel.update_data_model( self.config_data_1, data_model_8595, sites_id_update=sites_id, t_range_update=t_range1_train, data_attr_update=True) # Because we know data of period "90-95", so that we can get its statistics according to this period gages_model1_test = GagesModel.update_data_model( self.config_data_1, data_model_8595, sites_id_update=sites_id, t_range_update=t_range1_test, data_attr_update=True) t_range2_train = self.config_data_2.model_dict["data"]["tRangeTrain"] t_range2_test = self.config_data_2.model_dict["data"]["tRangeTest"] gages_model2_train = GagesModel.update_data_model( self.config_data_2, data_model_8595, sites_id_update=sites_id, t_range_update=t_range2_train, data_attr_update=True) gages_model2_test = GagesModel.update_data_model( self.config_data_2, data_model_9505, sites_id_update=sites_id, t_range_update=t_range2_test, data_attr_update=True, train_stat_dict=gages_model2_train.stat_dict) save_datamodel(gages_model1_train, "1", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model1_test, "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') save_datamodel(gages_model2_train, "2", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model2_test, "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def pub_lstm(args): update_cfg(cfg, args) random_seed = cfg.RANDOM_SEED test_epoch = cfg.TEST_EPOCH gpu_num = cfg.CTX train_mode = cfg.TRAIN_MODE cache = cfg.CACHE.STATE pub_plan = cfg.PUB_PLAN plus = cfg.PLUS dor = cfg.GAGES.attrScreenParams.DOR split_num = cfg.SPLIT_NUM print("train and test for PUB: \n") config_data = GagesConfig(cfg) if cache: eco_names = [ ("ECO2_CODE", 5.2), ("ECO2_CODE", 5.3), ("ECO2_CODE", 6.2), ("ECO2_CODE", 7.1), ("ECO2_CODE", 8.1), ("ECO2_CODE", 8.2), ("ECO2_CODE", 8.3), ("ECO2_CODE", 8.4), ("ECO2_CODE", 8.5), ("ECO2_CODE", 9.2), ("ECO2_CODE", 9.3), ("ECO2_CODE", 9.4), ("ECO2_CODE", 9.5), ("ECO2_CODE", 9.6), ("ECO2_CODE", 10.1), ("ECO2_CODE", 10.2), ("ECO2_CODE", 10.4), ("ECO2_CODE", 11.1), ("ECO2_CODE", 12.1), ("ECO2_CODE", 13.1) ] quick_data_dir = os.path.join(config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites_id = data_model_train.t_s_dict["sites_id"] if pub_plan == 0: """do a pub test like freddy's""" camels531_gageid_file = os.path.join(config_data.data_path["DB"], "camels531", "camels531.txt") gauge_df = pd.read_csv(camels531_gageid_file, dtype={"GaugeID": str}) gauge_list = gauge_df["GaugeID"].values all_sites_camels_531 = np.sort( [str(gauge).zfill(8) for gauge in gauge_list]) sites_id_train = np.intersect1d(conus_sites_id, all_sites_camels_531) # basins not in CAMELS sites_id_test = [ a_temp_site for a_temp_site in conus_sites_id if a_temp_site not in all_sites_camels_531 ] assert (all(x < y for x, y in zip(sites_id_test, sites_id_test[1:]))) elif pub_plan == 1 or pub_plan == 4: source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=-dor) # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 100000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] if pub_plan == 1: sites_id_train = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_test = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() else: sites_id_train = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() sites_id_test = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] elif pub_plan == 2 or pub_plan == 5: source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) if pub_plan == 2: sites_id_train = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_test = source_data_dor1.all_configs[ 'flow_screen_gage_id'] else: sites_id_train = source_data_dor1.all_configs[ 'flow_screen_gage_id'] sites_id_test = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] elif pub_plan == 3 or pub_plan == 6: dor_1 = -dor dor_2 = dor source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 100000]) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] source_data_dor2 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) if pub_plan == 3: sites_id_train = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() sites_id_test = source_data_dor2.all_configs[ 'flow_screen_gage_id'] else: sites_id_train = source_data_dor2.all_configs[ 'flow_screen_gage_id'] sites_id_test = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() else: print("wrong plan") sites_id_train = None sites_id_test = None train_sites_in_conus = np.intersect1d(conus_sites_id, sites_id_train) test_sites_in_conus = np.intersect1d(conus_sites_id, sites_id_test) if plus == 0: all_index_lst_train_1 = [] # all sites come from train1 dataset sites_lst_train = [] all_index_lst_test_1 = [] sites_lst_test_1 = [] all_index_lst_test_2 = [] sites_lst_test_2 = [] np.random.seed(random_seed) kf = KFold(n_splits=split_num, shuffle=True, random_state=random_seed) eco_name_chosen = [] for eco_name in eco_names: eco_source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, ecoregion=eco_name) eco_sites_id = eco_source_data.all_configs[ 'flow_screen_gage_id'] train_sites_id_inter = np.intersect1d(train_sites_in_conus, eco_sites_id) test_sites_id_inter = np.intersect1d(test_sites_in_conus, eco_sites_id) if train_sites_id_inter.size < split_num or test_sites_id_inter.size < 1: continue for train, test in kf.split(train_sites_id_inter): all_index_lst_train_1.append(train) sites_lst_train.append(train_sites_id_inter[train]) all_index_lst_test_1.append(test) sites_lst_test_1.append(train_sites_id_inter[test]) if test_sites_id_inter.size < test.size: all_index_lst_test_2.append( np.arange(test_sites_id_inter.size)) sites_lst_test_2.append(test_sites_id_inter) else: test2_chosen_idx = np.random.choice( test_sites_id_inter.size, test.size, replace=False) all_index_lst_test_2.append(test2_chosen_idx) sites_lst_test_2.append( test_sites_id_inter[test2_chosen_idx]) eco_name_chosen.append(eco_name) elif plus == -1: print("camels pub, only do pub on the camels basins") all_index_lst_train_1 = [] # all sites come from train1 dataset sites_lst_train = [] all_index_lst_test_1 = [] sites_lst_test_1 = [] np.random.seed(random_seed) kf = KFold(n_splits=split_num, shuffle=True, random_state=random_seed) eco_name_chosen = [] for eco_name in eco_names: eco_source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, ecoregion=eco_name) eco_sites_id = eco_source_data.all_configs[ 'flow_screen_gage_id'] train_sites_id_inter = np.intersect1d(train_sites_in_conus, eco_sites_id) if train_sites_id_inter.size < split_num: continue for train, test in kf.split(train_sites_id_inter): all_index_lst_train_1.append(train) sites_lst_train.append(train_sites_id_inter[train]) all_index_lst_test_1.append(test) sites_lst_test_1.append(train_sites_id_inter[test]) eco_name_chosen.append(eco_name) elif plus == -2: print( "camels pub, only do pub on the camels basins, same with freddy's split method" ) all_index_lst_train_1 = [] # all sites come from train1 dataset sites_lst_train = [] all_index_lst_test_1 = [] sites_lst_test_1 = [] np.random.seed(random_seed) kf = KFold(n_splits=split_num, shuffle=True, random_state=random_seed) for train, test in kf.split(train_sites_in_conus): all_index_lst_train_1.append(train) sites_lst_train.append(train_sites_in_conus[train]) all_index_lst_test_1.append(test) sites_lst_test_1.append(train_sites_in_conus[test]) else: sites_lst_train = [] sites_lst_test_1 = [] sites_lst_test_2 = [] np.random.seed(random_seed) kf = KFold(n_splits=split_num, shuffle=True, random_state=random_seed) eco_name_chosen = [] for eco_name in eco_names: eco_source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, ecoregion=eco_name) eco_sites_id = eco_source_data.all_configs[ 'flow_screen_gage_id'] sites_id_inter_1 = np.intersect1d(train_sites_in_conus, eco_sites_id) sites_id_inter_2 = np.intersect1d(test_sites_in_conus, eco_sites_id) if sites_id_inter_1.size < sites_id_inter_2.size: if sites_id_inter_1.size < split_num: continue for train, test in kf.split(sites_id_inter_1): sites_lst_train_1 = sites_id_inter_1[train] sites_lst_test_1.append(sites_id_inter_1[test]) chosen_lst_2 = random_choice_no_return( sites_id_inter_2, [train.size, test.size]) sites_lst_train_2 = chosen_lst_2[0] sites_lst_test_2.append(chosen_lst_2[1]) sites_lst_train.append( np.sort( np.append(sites_lst_train_1, sites_lst_train_2))) else: if sites_id_inter_2.size < split_num: continue for train, test in kf.split(sites_id_inter_2): sites_lst_train_2 = sites_id_inter_2[train] sites_lst_test_2.append(sites_id_inter_2[test]) chosen_lst_1 = random_choice_no_return( sites_id_inter_1, [train.size, test.size]) sites_lst_train_1 = chosen_lst_1[0] sites_lst_test_1.append(chosen_lst_1[1]) sites_lst_train.append( np.sort( np.append(sites_lst_train_1, sites_lst_train_2))) eco_name_chosen.append(eco_name) for i in range(split_num): sites_ids_train_ilst = [ sites_lst_train[j] for j in range(len(sites_lst_train)) if j % split_num == i ] sites_ids_train_i = np.sort( reduce(lambda x, y: np.hstack((x, y)), sites_ids_train_ilst)) sites_ids_test_ilst_1 = [ sites_lst_test_1[j] for j in range(len(sites_lst_test_1)) if j % split_num == i ] sites_ids_test_i_1 = np.sort( reduce(lambda x, y: np.hstack((x, y)), sites_ids_test_ilst_1)) if plus >= 0: sites_ids_test_ilst_2 = [ sites_lst_test_2[j] for j in range(len(sites_lst_test_2)) if j % split_num == i ] sites_ids_test_i_2 = np.sort( reduce(lambda x, y: np.hstack((x, y)), sites_ids_test_ilst_2)) config_data_i = GagesConfig.set_subdir(cfg, str(i)) gages_model_train_i = GagesModel.update_data_model( config_data_i, data_model_train, sites_id_update=sites_ids_train_i, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test_baseline_i = GagesModel.update_data_model( config_data_i, data_model_test, sites_id_update=sites_ids_train_i, data_attr_update=True, train_stat_dict=gages_model_train_i.stat_dict, screen_basin_area_huc4=False) gages_model_test_i_1 = GagesModel.update_data_model( config_data_i, data_model_test, sites_id_update=sites_ids_test_i_1, data_attr_update=True, train_stat_dict=gages_model_train_i.stat_dict, screen_basin_area_huc4=False) if plus >= 0: gages_model_test_i_2 = GagesModel.update_data_model( config_data_i, data_model_test, sites_id_update=sites_ids_test_i_2, data_attr_update=True, train_stat_dict=gages_model_train_i.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train_i, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test_baseline_i, data_source_file_name='test_data_source_base.txt', stat_file_name='test_Statistics_base.json', flow_file_name='test_flow_base', forcing_file_name='test_forcing_base', attr_file_name='test_attr_base', f_dict_file_name='test_dictFactorize_base.json', var_dict_file_name='test_dictAttribute_base.json', t_s_dict_file_name='test_dictTimeSpace_base.json') save_datamodel(gages_model_test_i_1, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') if plus >= 0: save_datamodel(gages_model_test_i_2, data_source_file_name='test_data_source_2.txt', stat_file_name='test_Statistics_2.json', flow_file_name='test_flow_2', forcing_file_name='test_forcing_2', attr_file_name='test_attr_2', f_dict_file_name='test_dictFactorize_2.json', var_dict_file_name='test_dictAttribute_2.json', t_s_dict_file_name='test_dictTimeSpace_2.json') print("save ecoregion " + str(i) + " data model") with torch.cuda.device(gpu_num): if train_mode: for i in range(split_num): data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') master_train(data_model, random_seed=random_seed) for i in range(split_num): data_model_baseline = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_base.txt', stat_file_name='test_Statistics_base.json', flow_file_name='test_flow_base.npy', forcing_file_name='test_forcing_base.npy', attr_file_name='test_attr_base.npy', f_dict_file_name='test_dictFactorize_base.json', var_dict_file_name='test_dictAttribute_base.json', t_s_dict_file_name='test_dictTimeSpace_base.json') data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') if plus >= 0: data_model_2 = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_2.txt', stat_file_name='test_Statistics_2.json', flow_file_name='test_flow_2.npy', forcing_file_name='test_forcing_2.npy', attr_file_name='test_attr_2.npy', f_dict_file_name='test_dictFactorize_2.json', var_dict_file_name='test_dictAttribute_2.json', t_s_dict_file_name='test_dictTimeSpace_2.json') pred_baseline, obs_baseline = master_test(data_model_baseline, epoch=test_epoch, save_file_suffix="base") basin_area_baseline = data_model_baseline.data_source.read_attr( data_model_baseline.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep_baseline = data_model_baseline.data_source.read_attr( data_model_baseline.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep_baseline = mean_prep_baseline / 365 * 10 pred_baseline = _basin_norm(pred_baseline, basin_area_baseline, mean_prep_baseline, to_norm=False) obs_baseline = _basin_norm(obs_baseline, basin_area_baseline, mean_prep_baseline, to_norm=False) save_result( data_model_baseline.data_source.data_config.data_path['Temp'], test_epoch, pred_baseline, obs_baseline, pred_name='flow_pred_base', obs_name='flow_obs_base') pred, obs = master_test(data_model, epoch=test_epoch) basin_area = data_model.data_source.read_attr( data_model.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = data_model.data_source.read_attr( data_model.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result(data_model.data_source.data_config.data_path['Temp'], test_epoch, pred, obs) if plus >= 0: pred_2, obs_2 = master_test(data_model_2, epoch=test_epoch, save_file_suffix="2") basin_area_2 = data_model_2.data_source.read_attr( data_model_2.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep_2 = data_model_2.data_source.read_attr( data_model_2.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep_2 = mean_prep_2 / 365 * 10 pred_2 = _basin_norm(pred_2, basin_area_2, mean_prep_2, to_norm=False) obs_2 = _basin_norm(obs_2, basin_area_2, mean_prep_2, to_norm=False) save_result( data_model_2.data_source.data_config.data_path['Temp'], test_epoch, pred_2, obs_2, pred_name='flow_pred_2', obs_name='flow_obs_2')
def test_gages_data_model(self): config_data = self.config_data dam_num = 0 source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=dam_num) sites_id = source_data.all_configs['flow_screen_gage_id'] quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_85-05_nan-0.1_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_train = GagesModel.update_data_model( self.config_data, data_model_train, sites_id_update=sites_id, screen_basin_area_huc4=False) gages_model_test = GagesModel.update_data_model( self.config_data, data_model_test, sites_id_update=sites_id, train_stat_dict=gages_model_train.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def test_some_reservoirs(self): config_data = self.config_data dam_num = 0 dor = 0.02 source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) # basins with dams source_data_withoutdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=dam_num) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withoutdams = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id = np.sort( np.union1d(np.array(sites_id_dor1), np.array(sites_id_withoutdams))).tolist() quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_train = GagesModel.update_data_model( self.config_data, data_model_train, sites_id_update=sites_id, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test = GagesModel.update_data_model( self.config_data, data_model_test, sites_id_update=sites_id, data_attr_update=True, train_stat_dict=gages_model_train.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def test_some_reservoirs(self): """choose some small reservoirs to train and test""" # 读取模型配置文件 config_data = self.config_data # according to paper "High-resolution mapping of the world's reservoirs and dams for sustainable river-flow management" dor = -0.02 # meaning dor < 0.02 source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) sites_id = source_data.all_configs['flow_screen_gage_id'] quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_train = GagesModel.update_data_model( self.config_data, data_model_train, sites_id_update=sites_id, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test = GagesModel.update_data_model( self.config_data, data_model_test, sites_id_update=sites_id, data_attr_update=True, train_stat_dict=gages_model_train.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
train_stat_dict=gages_model_train.stat_dict, screen_basin_area_huc4=False) inds_df, pred_mean, obs_mean = load_ensemble_result(cfg, exp_lst, test_epoch, return_value=True) plt.rcParams['font.family'] = 'serif' plt.rcParams['font.serif'] = ['Times New Roman' ] + plt.rcParams['font.serif'] ########################### plot diversion dor ecdf ########################### diversion_yes = True diversion_no = False source_data_diversion = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, diversion=diversion_yes) source_data_nodivert = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, diversion=diversion_no) sites_id_nodivert = source_data_nodivert.all_configs['flow_screen_gage_id'] sites_id_diversion = source_data_diversion.all_configs[ 'flow_screen_gage_id'] dor_1 = -dor_cutoff dor_2 = dor_cutoff source_data_dor1 = GagesSource.choose_some_basins( config_data,
def test_some_reservoirs(self): # # a control group for simulate/exp3 dor = -0.02 # meaning dor < 0.02 source_data = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) sites_id_dor = source_data.all_configs['flow_screen_gage_id'] quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_9000 = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_0010 = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites_id_all = data_model_9000.t_s_dict["sites_id"] nomajordam_source_data = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, major_dam_num=0) nomajordam_sites_id = nomajordam_source_data.all_configs[ 'flow_screen_gage_id'] # In no major dam case, all sites are chosen as natural flow generator nomajordam_in_conus = np.intersect1d(conus_sites_id_all, nomajordam_sites_id) conus_sites_id_dor = np.intersect1d(conus_sites_id_all, sites_id_dor) majordam_source_data = GagesSource.choose_some_basins( self.config_data, self.config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, major_dam_num=[1, 2000]) majordam_sites_id = majordam_source_data.all_configs[ 'flow_screen_gage_id'] majordam_in_conus = np.intersect1d(conus_sites_id_dor, majordam_sites_id) chosen_sites_id = np.sort( np.append(nomajordam_in_conus, majordam_in_conus)) gages_model_train_lstm = GagesModel.update_data_model( self.config_data, data_model_9000, sites_id_update=chosen_sites_id, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test_lstm = GagesModel.update_data_model( self.config_data, data_model_0010, sites_id_update=chosen_sites_id, data_attr_update=True, train_stat_dict=gages_model_train_lstm.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train_lstm, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test_lstm, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def test_stor_seperate(self): config_dir = definitions.CONFIG_DIR config_file = os.path.join(config_dir, "basic/config_exp18.ini") subdir = r"basic/exp18" config_data = GagesConfig.set_subdir(config_file, subdir) data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') all_sites = data_model.t_s_dict["sites_id"] storage_nor_1 = [0, 50] storage_nor_2 = [50, 15000] # max is 14348.6581036888 source_data_nor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], STORAGE=storage_nor_1) source_data_nor2 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], STORAGE=storage_nor_2) sites_id_nor1 = source_data_nor1.all_configs['flow_screen_gage_id'] sites_id_nor2 = source_data_nor2.all_configs['flow_screen_gage_id'] idx_lst_nor1 = [ i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor1 ] idx_lst_nor2 = [ i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor2 ] pred, obs = load_result( data_model.data_source.data_config.data_path['Temp'], self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) inds_df = pd.DataFrame(inds) keys_nse = "NSE" xs = [] ys = [] cases_exps_legends_together = ["small_stor", "large_stor"] x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor1]) xs.append(x1) ys.append(y1) x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor2]) xs.append(x2) ys.append(y2) cases_exps = ["dam_exp12", "dam_exp11"] cases_exps_legends_separate = ["small_stor", "large_stor"] # cases_exps = ["dam_exp4", "dam_exp5", "dam_exp6"] # cases_exps = ["dam_exp1", "dam_exp2", "dam_exp3"] # cases_exps_legends = ["dam-lstm", "dam-with-natural-flow", "dam-with-kernel"] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i[keys_nse]) xs.append(x) ys.append(y) plot_ecdfs(xs, ys, cases_exps_legends_together + cases_exps_legends_separate, style=["together", "together", "separate", "separate"])
def test_zero_small_dor_basins_locations(self): conus_exps = self.exp_lst test_epoch = self.test_epoch inds_df, pred, obs = load_ensemble_result(self.config_file, conus_exps, test_epoch, return_value=True) conus_config_data = load_dataconfig_case_exp(self.config_file, conus_exps[0]) conus_data_model = GagesModel.load_datamodel( conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"] all_lat = conus_data_model.data_source.gage_dict["LAT_GAGE"] all_lon = conus_data_model.data_source.gage_dict["LNG_GAGE"] show_ind_key = "NSE" attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"] attrs = conus_data_model.data_source.read_attr(conus_sites, attr_lst, is_return_dict=False) western_lon_idx = [i for i in range(all_lon.size) if all_lon[i] < -100] nse_range = [0, 1] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() idx_lst_nse = np.intersect1d(western_lon_idx, idx_lst_nse) # small dor source_data_dor1 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=-self.dor) # basins with dams source_data_withdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus, idx_lst_nse).tolist() type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus, idx_lst_nse).tolist() pd.DataFrame({ "GAGE_ID": np.array(conus_sites)[type_1_index_lst] }).to_csv( os.path.join(conus_config_data.data_path["Out"], "western-zero-dor-sites.csv")) pd.DataFrame({ "GAGE_ID": np.array(conus_sites)[type_2_index_lst] }).to_csv( os.path.join(conus_config_data.data_path["Out"], "western-small-dor-sites.csv")) frame = [] df_type1 = pd.DataFrame({ "type": np.full(len(type_1_index_lst), "zero-dor"), show_ind_key: inds_df[show_ind_key].values[type_1_index_lst], "lat": all_lat[type_1_index_lst], "lon": all_lon[type_1_index_lst], "slope": attrs[type_1_index_lst, 0], "elevation": attrs[type_1_index_lst, 1] }) frame.append(df_type1) df_type2 = pd.DataFrame({ "type": np.full(len(type_2_index_lst), "small-dor"), show_ind_key: inds_df[show_ind_key].values[type_2_index_lst], "lat": all_lat[type_2_index_lst], "lon": all_lon[type_2_index_lst], "slope": attrs[type_2_index_lst, 0], "elevation": attrs[type_2_index_lst, 1] }) frame.append(df_type2) data_df = pd.concat(frame) idx_lst = [ np.arange(len(type_1_index_lst)), np.arange(len(type_1_index_lst), len(type_1_index_lst) + len(type_2_index_lst)) ] plot_gages_map_and_scatter(data_df, [show_ind_key, "lat", "lon", "slope"], idx_lst, cmap_strs=["Reds", "Blues"], labels=["zero-dor", "small-dor"], scatter_label=[attr_lst[0], show_ind_key], wspace=2, hspace=1.5, legend_y=.8, sub_fig_ratio=[6, 4, 1]) plt.tight_layout() plt.show()
def test_diff_dor(self): dor_1 = -self.dor dor_2 = self.dor test_epoch = self.test_epoch config_file = self.config_file conus_exps = ["basic_exp37"] pair1_exps = ["dam_exp1"] pair2_exps = ["nodam_exp7"] pair3_exps = ["dam_exp27"] nodam_exp_lst = ["nodam_exp1"] smalldam_exp_lst = [ "dam_exp17" ] # -0.003["dam_exp11"] -0.08["dam_exp17"] -1["dam_exp32"] largedam_exp_lst = [ "dam_exp4" ] # 0.003["dam_exp12"] 0.08["dam_exp18"] 1["dam_exp33"] pair1_config_data = load_dataconfig_case_exp(config_file, pair1_exps[0]) pair2_config_data = load_dataconfig_case_exp(config_file, pair2_exps[0]) pair3_config_data = load_dataconfig_case_exp(config_file, pair3_exps[0]) conus_config_data = load_dataconfig_case_exp(config_file, conus_exps[0]) conus_data_model = GagesModel.load_datamodel( conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"] source_data_dor1 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) source_data_dor2 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) # basins with dams source_data_withdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id'] # sites_id_nolargedam = np.sort(np.union1d(np.array(sites_id_nodam), np.array(sites_id_largedam))).tolist() # pair1_sites = np.sort(np.intersect1d(np.array(sites_id_dor1), np.array(conus_sites))).tolist() # pair2_sites = np.sort(np.intersect1d(np.array(sites_id_nolargedam), np.array(conus_sites))).tolist() # pair3_sites = np.sort(np.intersect1d(np.array(sites_id_withdams), np.array(conus_sites))).tolist() pair1_data_model = GagesModel.load_datamodel( pair1_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair1_sites = pair1_data_model.t_s_dict["sites_id"] pair2_data_model = GagesModel.load_datamodel( pair2_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair2_sites = pair2_data_model.t_s_dict["sites_id"] pair3_data_model = GagesModel.load_datamodel( pair3_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair3_sites = pair3_data_model.t_s_dict["sites_id"] idx_lst_nodam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_nodam ] idx_lst_nodam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_nodam ] idx_lst_nodam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_nodam ] idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] idx_lst_largedam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_largedam ] idx_lst_largedam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_largedam ] idx_lst_largedam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_largedam ] idx_lst_largedam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_largedam ] print("multi box") inds_df_pair1 = load_ensemble_result(config_file, pair1_exps, test_epoch) inds_df_pair2 = load_ensemble_result(config_file, pair2_exps, test_epoch) inds_df_pair3 = load_ensemble_result(config_file, pair3_exps, test_epoch) inds_df_conus = load_ensemble_result(config_file, conus_exps, test_epoch) fig = plt.figure(figsize=(15, 8)) gs = gridspec.GridSpec(1, 3) keys_nse = "NSE" color_chosen = ["Greens", "Blues", "Reds"] median_loc = 0.015 decimal_places = 2 sns.despine() sns.set(font_scale=1.5) attr_nodam = "zero_dor" cases_exps_legends_nodam = [ "LSTM-Z", "LSTM-ZS", "LSTM-ZL", "LSTM-CONUS" ] frames_nodam = [] inds_df_nodam = load_ensemble_result(config_file, nodam_exp_lst, test_epoch) df_nodam_alone = pd.DataFrame({ attr_nodam: np.full([inds_df_nodam.shape[0]], cases_exps_legends_nodam[0]), keys_nse: inds_df_nodam[keys_nse] }) frames_nodam.append(df_nodam_alone) df_nodam_in_pair1 = pd.DataFrame({ attr_nodam: np.full([ inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1].shape[0] ], cases_exps_legends_nodam[1]), keys_nse: inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1] }) frames_nodam.append(df_nodam_in_pair1) df_nodam_in_pair2 = pd.DataFrame({ attr_nodam: np.full([ inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2].shape[0] ], cases_exps_legends_nodam[2]), keys_nse: inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2] }) frames_nodam.append(df_nodam_in_pair2) df_nodam_in_conus = pd.DataFrame({ attr_nodam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus].shape[0] ], cases_exps_legends_nodam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus] }) frames_nodam.append(df_nodam_in_conus) result_nodam = pd.concat(frames_nodam) ax1 = plt.subplot(gs[0]) # ax1.set_title("(a)") ax1.set_xticklabels(ax1.get_xticklabels(), rotation=30) ax1.set_ylim([0, 1]) sns.boxplot(ax=ax1, x=attr_nodam, y=keys_nse, data=result_nodam, showfliers=False, palette=color_chosen[0]) medians_nodam = result_nodam.groupby( [attr_nodam], sort=False)[keys_nse].median().values median_labels_nodam = [ str(np.round(s, decimal_places)) for s in medians_nodam ] pos1 = range(len(medians_nodam)) for tick, label in zip(pos1, ax1.get_xticklabels()): ax1.text(pos1[tick], medians_nodam[tick] + median_loc, median_labels_nodam[tick], horizontalalignment='center', size='x-small', weight='semibold') attr_smalldam = "small_dor" cases_exps_legends_smalldam = [ "LSTM-S", "LSTM-ZS", "LSTM-SL", "LSTM-CONUS" ] frames_smalldam = [] inds_df_smalldam = load_ensemble_result(config_file, smalldam_exp_lst, test_epoch) df_smalldam_alone = pd.DataFrame({ attr_smalldam: np.full([inds_df_smalldam.shape[0]], cases_exps_legends_smalldam[0]), keys_nse: inds_df_smalldam[keys_nse] }) frames_smalldam.append(df_smalldam_alone) df_smalldam_in_pair1 = pd.DataFrame({ attr_smalldam: np.full([ inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1]. shape[0] ], cases_exps_legends_smalldam[1]), keys_nse: inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1] }) frames_smalldam.append(df_smalldam_in_pair1) df_smalldam_in_pair3 = pd.DataFrame({ attr_smalldam: np.full([ inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3]. shape[0] ], cases_exps_legends_smalldam[2]), keys_nse: inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3] }) frames_smalldam.append(df_smalldam_in_pair3) df_smalldam_in_conus = pd.DataFrame({ attr_smalldam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus]. shape[0] ], cases_exps_legends_smalldam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus] }) frames_smalldam.append(df_smalldam_in_conus) result_smalldam = pd.concat(frames_smalldam) ax2 = plt.subplot(gs[1]) # ax2.set_title("(b)") ax2.set_xticklabels(ax2.get_xticklabels(), rotation=30) ax2.set_ylim([0, 1]) ax2.set(ylabel=None) sns.boxplot(ax=ax2, x=attr_smalldam, y=keys_nse, data=result_smalldam, showfliers=False, palette=color_chosen[1]) medians_smalldam = result_smalldam.groupby( [attr_smalldam], sort=False)[keys_nse].median().values median_labels_smalldam = [ str(np.round(s, decimal_places)) for s in medians_smalldam ] pos2 = range(len(medians_smalldam)) for tick, label in zip(pos2, ax2.get_xticklabels()): ax2.text(pos2[tick], medians_smalldam[tick] + median_loc, median_labels_smalldam[tick], horizontalalignment='center', size='x-small', weight='semibold') attr_largedam = "large_dor" cases_exps_legends_largedam = [ "LSTM-L", "LSTM-ZL", "LSTM-SL", "LSTM-CONUS" ] frames_largedam = [] inds_df_largedam = load_ensemble_result(config_file, largedam_exp_lst, test_epoch) df_largedam_alone = pd.DataFrame({ attr_largedam: np.full([inds_df_largedam.shape[0]], cases_exps_legends_largedam[0]), keys_nse: inds_df_largedam[keys_nse] }) frames_largedam.append(df_largedam_alone) df_largedam_in_pair2 = pd.DataFrame({ attr_largedam: np.full([ inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2]. shape[0] ], cases_exps_legends_largedam[1]), keys_nse: inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2] }) frames_largedam.append(df_largedam_in_pair2) df_largedam_in_pair3 = pd.DataFrame({ attr_largedam: np.full([ inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3]. shape[0] ], cases_exps_legends_largedam[2]), keys_nse: inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3] }) frames_largedam.append(df_largedam_in_pair3) df_largedam_in_conus = pd.DataFrame({ attr_largedam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus]. shape[0] ], cases_exps_legends_largedam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus] }) frames_largedam.append(df_largedam_in_conus) result_largedam = pd.concat(frames_largedam) ax3 = plt.subplot(gs[2]) # ax3.set_title("(c)") ax3.set_xticklabels(ax3.get_xticklabels(), rotation=30) ax3.set_ylim([0, 1]) ax3.set(ylabel=None) sns.boxplot(ax=ax3, x=attr_largedam, y=keys_nse, data=result_largedam, showfliers=False, palette=color_chosen[2]) medians_largedam = result_largedam.groupby( [attr_largedam], sort=False)[keys_nse].median().values median_labels_largedam = [ str(np.round(s, decimal_places)) for s in medians_largedam ] pos3 = range(len(medians_largedam)) for tick, label in zip(pos3, ax3.get_xticklabels()): ax3.text(pos3[tick], medians_largedam[tick] + median_loc, median_labels_largedam[tick], horizontalalignment='center', size='x-small', weight='semibold') # sns.despine() plt.tight_layout() plt.show()
def test_diff_dor_fig2_in_the_paper(self): data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') config_data = self.config_data config_file = self.config_file test_epoch = self.test_epoch exp_lst = self.exp_lst figure_dpi = self.FIGURE_DPI inds_df, pred_mean, obs_mean = load_ensemble_result(config_file, exp_lst, test_epoch, return_value=True) diversion_yes = True diversion_no = False source_data_diversion = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, diversion=diversion_yes) source_data_nodivert = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, diversion=diversion_no) sites_id_nodivert = source_data_nodivert.all_configs[ 'flow_screen_gage_id'] sites_id_diversion = source_data_diversion.all_configs[ 'flow_screen_gage_id'] dor_1 = -self.dor dor_2 = self.dor source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) source_data_dor2 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_dor2 = source_data_dor2.all_configs['flow_screen_gage_id'] # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 100000]) sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_dor1 = np.intersect1d(np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() no_divert_small_dor = np.intersect1d(sites_id_nodivert, sites_id_dor1) no_divert_large_dor = np.intersect1d(sites_id_nodivert, sites_id_dor2) diversion_small_dor = np.intersect1d(sites_id_diversion, sites_id_dor1) diversion_large_dor = np.intersect1d(sites_id_diversion, sites_id_dor2) all_sites = data_model.t_s_dict["sites_id"] idx_lst_nodivert_smalldor = [ i for i in range(len(all_sites)) if all_sites[i] in no_divert_small_dor ] idx_lst_nodivert_largedor = [ i for i in range(len(all_sites)) if all_sites[i] in no_divert_large_dor ] idx_lst_diversion_smalldor = [ i for i in range(len(all_sites)) if all_sites[i] in diversion_small_dor ] idx_lst_diversion_largedor = [ i for i in range(len(all_sites)) if all_sites[i] in diversion_large_dor ] keys_nse = "NSE" xs = [] ys = [] cases_exps_legends_together = [ "not_diverted_small_dor", "not_diverted_large_dor", "diversion_small_dor", "diversion_large_dor", "CONUS" ] x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_smalldor]) xs.append(x1) ys.append(y1) x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nodivert_largedor]) xs.append(x2) ys.append(y2) x3, y3 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_smalldor]) xs.append(x3) ys.append(y3) x4, y4 = ecdf(inds_df[keys_nse].iloc[idx_lst_diversion_largedor]) xs.append(x4) ys.append(y4) x_conus, y_conus = ecdf(inds_df[keys_nse]) xs.append(x_conus) ys.append(y_conus) hydro_logger.info( "The median NSEs of all five curves (%s) are \n %.2f, %.2f, %.2f, %.2f, %.2f", cases_exps_legends_together, np.median(x1), np.median(x2), np.median(x3), np.median(x4), np.median(x_conus)) # plot_ecdfs_matplot(xs, ys, cases_exps_legends_together, # colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "grey"], # dash_lines=[False, False, False, False, True], x_str="NSE", y_str="CDF") # plot using two linestyles and two colors for dor and diversion. # plot_ecdfs(xs, ys, cases_exps_legends_together, x_str="NSE", y_str="CDF") # define color scheme and line style colors = ["#1f77b4", "#d62728"] linestyles = ['-', "--"] markers = ["", "."] fig = plt.figure(figsize=(8, 6)) axes = fig.add_axes([0.1, 0.1, 0.8, 0.8]) # for i, marker in enumerate(markers): for i, linestyle in enumerate(linestyles): for j, color in enumerate(colors): plt.plot( xs[i * 2 + j], ys[i * 2 + j], color=color, ls=linestyle, # marker=marker, label=cases_exps_legends_together[i * 2 + j]) line_i, = axes.plot(x_conus, y_conus, color="grey", label=cases_exps_legends_together[4]) line_i.set_dashes([2, 2, 10, 2]) x_str = "NSE" y_str = "CDF" x_lim = (0, 1) y_lim = (0, 1) x_interval = 0.1 y_interval = 0.1 plt.xlabel(x_str, fontsize=18) plt.ylabel(y_str, fontsize=18) axes.set_xlim(x_lim[0], x_lim[1]) axes.set_ylim(y_lim[0], y_lim[1]) # set x y number font size plt.xticks(np.arange(x_lim[0], x_lim[1] + x_lim[1] / 100, x_interval), fontsize=16) plt.yticks(np.arange(y_lim[0], y_lim[1] + y_lim[1] / 100, y_interval), fontsize=16) plt.grid() # Hide the right and top spines axes.spines['right'].set_visible(False) axes.spines['top'].set_visible(False) axes.legend() plt.legend(prop={'size': 16}) plt.savefig(os.path.join(config_data.data_path["Out"], 'new_dor_divert_comp_matplotlib.png'), dpi=figure_dpi, bbox_inches="tight") plt.show()
def update_data_model(cls, config_data, data_model_origin, sites_id_update=None, t_range_update=None, data_attr_update=False, train_stat_dict=None, screen_basin_area_huc4=False): t_s_dict_origin = data_model_origin.t_s_dict data_flow_origin = data_model_origin.data_flow data_forcing_origin = data_model_origin.data_forcing data_attr_origin = data_model_origin.data_attr var_dict_origin = data_model_origin.var_dict f_dict_origin = data_model_origin.f_dict stat_dict_origin = data_model_origin.stat_dict if sites_id_update is not None: t_s_dict = {} t_range_origin_cpy = t_s_dict_origin["t_final_range"].copy() sites_id_origin_cpy = t_s_dict_origin["sites_id"].copy() sites_id_new = sites_id_update assert (all( x < y for x, y in zip(sites_id_origin_cpy, sites_id_origin_cpy[1:]))) assert (all(x < y for x, y in zip(sites_id_new, sites_id_new[1:]))) sites_id = np.intersect1d(sites_id_origin_cpy, sites_id_new) assert sites_id.size > 0 new_source_data = GagesSource.choose_some_basins( config_data, t_range_origin_cpy, screen_basin_area_huc4=screen_basin_area_huc4, sites_id=sites_id.tolist()) t_s_dict["t_final_range"] = t_range_origin_cpy t_s_dict["sites_id"] = sites_id.tolist() chosen_idx = [ i for i in range(len(sites_id_origin_cpy)) if sites_id_origin_cpy[i] in sites_id ] data_flow = data_flow_origin[chosen_idx, :] data_forcing = data_forcing_origin[chosen_idx, :, :] data_attr = data_attr_origin[chosen_idx, :] else: t_range_origin_cpy = t_s_dict_origin["t_final_range"].copy() t_s_dict = copy.deepcopy(t_s_dict_origin) new_source_data = GagesSource.choose_some_basins( config_data, t_range_origin_cpy, screen_basin_area_huc4=screen_basin_area_huc4) data_flow = data_flow_origin.copy() data_forcing = data_forcing_origin.copy() data_attr = data_attr_origin.copy() if data_attr_update: attr_lst = new_source_data.all_configs.get("attr_chosen") data_attr, var_dict, f_dict = new_source_data.read_attr( t_s_dict["sites_id"], attr_lst) else: var_dict = var_dict_origin.copy() f_dict = f_dict_origin.copy() data_model = cls(new_source_data, data_flow, data_forcing, data_attr, var_dict, f_dict, stat_dict_origin, t_s_dict) if t_range_update is not None: sites_id_temp = data_model.t_s_dict['sites_id'].copy() t_range = t_range_update.copy() stat_dict_temp = {} t_s_dict_temp = {} start_index = int( (np.datetime64(t_range[0]) - np.datetime64(data_model.t_s_dict["t_final_range"][0])) / np.timedelta64(1, 'D')) assert start_index >= 0 t_lst_temp = hydro_time.t_range_days(t_range) end_index = start_index + t_lst_temp.size data_flow = data_model.data_flow[:, start_index:end_index] data_forcing = data_model.data_forcing[:, start_index:end_index, :] data_model = cls(new_source_data, data_flow, data_forcing, data_attr, var_dict, f_dict, stat_dict_temp, t_s_dict_temp) t_s_dict_temp['sites_id'] = sites_id_temp t_s_dict_temp['t_final_range'] = t_range data_model.t_s_dict = t_s_dict_temp data_model.data_source.t_range = t_range if not data_model.data_source.gage_dict["STAID"].tolist( ) == data_model.t_s_dict['sites_id']: gage_dict_new = dict() usgs_all_sites = data_model.data_source.gage_dict["STAID"] sites_chosen = np.zeros(usgs_all_sites.shape[0]) usgs_ids = data_model.t_s_dict['sites_id'] sites_index = np.where(np.in1d(usgs_all_sites, usgs_ids))[0] sites_chosen[sites_index] = 1 for key, value in data_model.data_source.gage_dict.items(): value_new = np.array([ value[i] for i in range(len(sites_chosen)) if sites_chosen[i] > 0 ]) gage_dict_new[key] = value_new data_model.data_source.gage_dict = gage_dict_new assert (np.array(usgs_ids) == gage_dict_new["STAID"]).all() if train_stat_dict is None: stat_dict_temp = data_model.cal_stat_all() else: stat_dict_temp = train_stat_dict data_model.stat_dict = stat_dict_temp return data_model
def synergy_ecoregion(args): update_cfg(cfg, args) cache = cfg.CACHE.STATE train_mode = cfg.TRAIN_MODE test_epoch = cfg.TEST_EPOCH config_data = GagesConfig(cfg) eco_names = [("ECO2_CODE", 5.2), ("ECO2_CODE", 5.3), ("ECO2_CODE", 6.2), ("ECO2_CODE", 7.1), ("ECO2_CODE", 8.1), ("ECO2_CODE", 8.2), ("ECO2_CODE", 8.3), ("ECO2_CODE", 8.4), ("ECO2_CODE", 8.5), ("ECO2_CODE", 9.2), ("ECO2_CODE", 9.3), ("ECO2_CODE", 9.4), ("ECO2_CODE", 9.5), ("ECO2_CODE", 9.6), ("ECO2_CODE", 10.1), ("ECO2_CODE", 10.2), ("ECO2_CODE", 10.4), ("ECO2_CODE", 11.1), ("ECO2_CODE", 12.1), ("ECO2_CODE", 13.1)] quick_data_dir = os.path.join(config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') for eco_name in eco_names: source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, ecoregion=eco_name) sites_id = source_data.all_configs['flow_screen_gage_id'] sites_id_inter = np.intersect1d(data_model_train.t_s_dict["sites_id"], sites_id) if sites_id_inter.size < 1: continue config_data = GagesConfig.set_subdir(cfg, str(eco_name[1])) gages_model_train = GagesModel.update_data_model( config_data, data_model_train, sites_id_update=sites_id, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test = GagesModel.update_data_model( config_data, data_model_test, sites_id_update=sites_id, data_attr_update=True, train_stat_dict=gages_model_train.stat_dict, screen_basin_area_huc4=False) if cache: save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("save ecoregion " + str(eco_name[1]) + " data model") with torch.cuda.device(0): if train_mode: master_train(gages_model_train) pred, obs = master_test(gages_model_test, epoch=test_epoch) basin_area = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result( gages_model_test.data_source.data_config.data_path['Temp'], test_epoch, pred, obs)
def test_siminv_data_temp(self): quick_data_dir = os.path.join(self.config_data_natflow.data_path["DB"], "quickdata") # data_dir = os.path.join(quick_data_dir, "conus-all_85-05_nan-0.1_00-1.0") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_8595 = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_9505 = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites_id = data_model_8595.t_s_dict["sites_id"] nomajordam_source_data = GagesSource.choose_some_basins( self.config_data_natflow, self.config_data_natflow.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, major_dam_num=0) nomajordam_sites_id = nomajordam_source_data.all_configs[ 'flow_screen_gage_id'] nomajordam_in_conus = np.intersect1d(conus_sites_id, nomajordam_sites_id) majordam_source_data = GagesSource.choose_some_basins( self.config_data_natflow, self.config_data_natflow.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, major_dam_num=[1, 2000]) majordam_sites_id = majordam_source_data.all_configs[ 'flow_screen_gage_id'] majordam_in_conus = np.intersect1d(conus_sites_id, majordam_sites_id) gages_model_train_natflow = GagesModel.update_data_model( self.config_data_natflow, data_model_8595, sites_id_update=nomajordam_in_conus, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test_natflow = GagesModel.update_data_model( self.config_data_natflow, data_model_9505, sites_id_update=nomajordam_in_conus, data_attr_update=True, train_stat_dict=gages_model_train_natflow.stat_dict, screen_basin_area_huc4=False) gages_model_train_lstm = GagesModel.update_data_model( self.config_data_lstm, data_model_8595, sites_id_update=majordam_in_conus, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test_lstm = GagesModel.update_data_model( self.config_data_lstm, data_model_9505, sites_id_update=majordam_in_conus, data_attr_update=True, train_stat_dict=gages_model_train_lstm.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train_natflow, "1", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test_natflow, "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') save_datamodel(gages_model_train_lstm, "2", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test_lstm, "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def dam_lstm(args): update_cfg(cfg, args) random_seed = cfg.RANDOM_SEED test_epoch = cfg.TEST_EPOCH gpu_num = cfg.CTX train_mode = cfg.TRAIN_MODE dor = cfg.GAGES.attrScreenParams.DOR cache = cfg.CACHE.STATE print("train and test in basins with dams: \n") config_data = GagesConfig(cfg) source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 100000]) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs['flow_screen_gage_id'] sites_id_chosen = np.intersect1d(np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() gages_model = GagesModels(config_data, screen_basin_area_huc4=False, sites_id=sites_id_chosen) gages_model_train = gages_model.data_model_train gages_model_test = gages_model.data_model_test if cache: save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') with torch.cuda.device(gpu_num): if train_mode: master_train(gages_model_train, random_seed=random_seed) pred, obs = master_test(gages_model_test, epoch=test_epoch) basin_area = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result(gages_model_test.data_source.data_config.data_path['Temp'], test_epoch, pred, obs)
def test_some_reservoirs(self): print("train and test in basins with different combination: \n") dam_plan = self.dam_plan config_data = self.config_data test_epoch = self.test_epoch if dam_plan == 2: dam_num = 0 dor = self.config_file.GAGES.attrScreenParams.DOR source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) # basins with dams source_data_withoutdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=dam_num) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withoutdams = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_chosen = np.sort( np.union1d(np.array(sites_id_dor1), np.array(sites_id_withoutdams))).tolist() elif dam_plan == 3: dam_num = [1, 100000] # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=dam_num) sites_id_chosen = source_data_withdams.all_configs[ 'flow_screen_gage_id'] else: print("wrong choice") sites_id_chosen = None gages_model = GagesModels(config_data, screen_basin_area_huc4=False, sites_id=sites_id_chosen) gages_model_train = gages_model.data_model_train gages_model_test = gages_model.data_model_test if self.cache: save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') with torch.cuda.device(self.gpu_num): if self.train_mode: master_train(gages_model_train, random_seed=self.random_seed) pred, obs = master_test(gages_model_test, epoch=test_epoch) basin_area = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = gages_model_test.data_source.read_attr( gages_model_test.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result( gages_model_test.data_source.data_config.data_path['Temp'], test_epoch, pred, obs)
def test_gages_data_model(self): config_data = self.config_data major_dam_num = [1, 200] # max major dam num is 155 if cfg.CACHE.QUICK_DATA: source_data = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, major_dam_num=major_dam_num) sites_id = source_data.all_configs['flow_screen_gage_id'] print("The binary data has exsited") quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") # data_dir = os.path.join(quick_data_dir, "conus-all_85-05_nan-0.1_00-1.0") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_train = GagesModel.update_data_model( self.config_data, data_model_train, sites_id_update=sites_id, screen_basin_area_huc4=False) gages_model_test = GagesModel.update_data_model( self.config_data, data_model_test, sites_id_update=sites_id, train_stat_dict=gages_model_train.stat_dict, screen_basin_area_huc4=False) else: gages_model = GagesModels(config_data, screen_basin_area_huc4=False, major_dam_num=major_dam_num) gages_model_train = gages_model.data_model_train gages_model_test = gages_model.data_model_test if cfg.CACHE.STATE: save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def test_some_reservoirs(self): """choose some small reservoirs for 2nd lstm not for simulate""" # 读取模型配置文件 config_data = self.config_data_lstm # according to paper "High-resolution mapping of the world's reservoirs and dams for sustainable river-flow management" dor = 0.02 source_data = GagesSource.choose_some_basins(config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor) sites_id_dor = source_data.all_configs['flow_screen_gage_id'] quick_data_dir = os.path.join(self.config_data_lstm.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_8595 = GagesModel.load_datamodel(data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_9505 = GagesModel.load_datamodel(data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites_id_all = data_model_8595.t_s_dict["sites_id"] nomajordam_source_data = GagesSource.choose_some_basins(self.config_data_natflow, self.config_data_natflow.model_dict["data"][ "tRangeTrain"], screen_basin_area_huc4=False, major_dam_num=0) nomajordam_sites_id = nomajordam_source_data.all_configs['flow_screen_gage_id'] # In no major dam case, all sites are chosen as natural flow generator nomajordam_in_conus = np.intersect1d(conus_sites_id_all, nomajordam_sites_id) conus_sites_id_dor = np.intersect1d(conus_sites_id_all, sites_id_dor) majordam_source_data = GagesSource.choose_some_basins(self.config_data_natflow, self.config_data_natflow.model_dict["data"][ "tRangeTrain"], screen_basin_area_huc4=False, major_dam_num=[1, 2000]) majordam_sites_id = majordam_source_data.all_configs['flow_screen_gage_id'] majordam_in_conus = np.intersect1d(conus_sites_id_dor, majordam_sites_id) gages_model_train_natflow = GagesModel.update_data_model(self.config_data_natflow, data_model_8595, sites_id_update=nomajordam_in_conus, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test_natflow = GagesModel.update_data_model(self.config_data_natflow, data_model_9505, sites_id_update=nomajordam_in_conus, data_attr_update=True, train_stat_dict=gages_model_train_natflow.stat_dict, screen_basin_area_huc4=False) gages_model_train_lstm = GagesModel.update_data_model(self.config_data_lstm, data_model_8595, sites_id_update=majordam_in_conus, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test_lstm = GagesModel.update_data_model(self.config_data_lstm, data_model_9505, sites_id_update=majordam_in_conus, data_attr_update=True, train_stat_dict=gages_model_train_lstm.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train_natflow, "1", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test_natflow, "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') save_datamodel(gages_model_train_lstm, "2", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test_lstm, "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")