def test_plot_ecdf_together(self): xs = [] ys = [] cases_exps = ["basic_exp37", "basic_exp39"] cases_exps_legends = ["random_1234", "random_123"] test_epoch = 300 for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(cfg, case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i[self.keys[0]]) xs.append(x) ys.append(y) plot_ecdfs(xs, ys, cases_exps_legends, x_str="NSE", y_str="CDF") # cases_exps_addition = ["basic_exp39"] # xs_addition = [] # ys_addition = [] # for case_exp in cases_exps_addition: # config_data_i = load_dataconfig_case_exp(cfg, case_exp) # pred_i, obs_i = load_result(config_data_i.data_path['Temp'], test_epoch) # pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) # obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) # inds_i = statError(obs_i, pred_i) # x, y = ecdf(inds_i[self.keys[0]]) # xs_addition.append(x) # ys_addition.append(y) # plot_ecdfs(xs_addition, ys_addition, ["new"], x_str="NSE", y_str="CDF") plt.show()
def test_plot_ecdf_matplotlib(self): xs = [] ys = [] cases_exps = [ "basic_exp37", "basic_exp39", "basic_exp40", "basic_exp41", "basic_exp42" ] cases_exps_legends = [ "random_1234", "random_123", "random_12345", "random_111", "random_1111" ] test_epoch = 300 for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(cfg, case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i[self.keys[0]]) xs.append(x) ys.append(y) dash_lines = [False, False, False, False, True] plot_ecdfs_matplot( xs, ys, cases_exps_legends, colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "grey"], dash_lines=dash_lines, x_str="NSE", y_str="CDF") plt.show()
def test_ensemble_results_plot_box(self): preds = [] obss = [] # cases_exps = ["basic_exp11", "basic_exp17"] cases_exps = [ "basic_exp12", "basic_exp13", "basic_exp14", "basic_exp15", "basic_exp16", "basic_exp18" ] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) print(obs_i) preds.append(pred_i) obss.append(obs_i) preds_np = np.array(preds) obss_np = np.array(obss) pred_mean = np.mean(preds_np, axis=0) obs_mean = np.mean(obss_np, axis=0) inds = statError(obs_mean, pred_mean) keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test)
def setUp(self): """analyze result of model""" self.exp_num = "basic_exp37" self.config_data = load_dataconfig_case_exp(cfg, self.exp_num) self.test_epoch = 300 self.data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') attrBasin = ['ELEV_MEAN_M_BASIN', 'SLOPE_PCT', 'DRAIN_SQKM'] attrLandcover = [ 'FORESTNLCD06', 'BARRENNLCD06', 'DECIDNLCD06', 'EVERGRNLCD06', 'MIXEDFORNLCD06', 'SHRUBNLCD06', 'GRASSNLCD06', 'WOODYWETNLCD06', 'EMERGWETNLCD06' ] attrSoil = ['ROCKDEPAVE', 'AWCAVE', 'PERMAVE', 'RFACT'] attrGeol = [ 'GEOL_REEDBUSH_DOM', 'GEOL_REEDBUSH_DOM_PCT', 'GEOL_REEDBUSH_SITE' ] attrHydro = [ 'STREAMS_KM_SQ_KM', 'STRAHLER_MAX', 'MAINSTEM_SINUOUSITY', 'REACHCODE', 'ARTIFPATH_PCT', 'ARTIFPATH_MAINSTEM_PCT', 'HIRES_LENTIC_PCT', 'BFI_AVE', 'PERDUN', 'PERHOR', 'TOPWET', 'CONTACT' ] attrHydroModDams = [ 'NDAMS_2009', 'STOR_NOR_2009', 'RAW_AVG_DIS_ALL_MAJ_DAMS' ] attrHydroModOther = [ 'CANALS_PCT', 'RAW_AVG_DIS_ALLCANALS', 'NPDES_MAJ_DENS', 'RAW_AVG_DIS_ALL_MAJ_NPDES', 'FRESHW_WITHDRAWAL', 'PCT_IRRIG_AG', 'POWER_SUM_MW' ] attrLandscapePat = ['FRAGUN_BASIN'] attrLC06Basin = ['DEVNLCD06', 'FORESTNLCD06', 'PLANTNLCD06'] attrPopInfrastr = ['ROADS_KM_SQ_KM'] attrProtAreas = ['PADCAT1_PCT_BASIN', 'PADCAT2_PCT_BASIN'] self.attr_lst = attrLandscapePat + attrLC06Basin + attrPopInfrastr + attrProtAreas # self.attr_lst = attrHydroModOther # plot is_nse_good pred, obs = load_result( self.data_model.data_source.data_config.data_path['Temp'], self.test_epoch) self.pred = pred.reshape(pred.shape[0], pred.shape[1]) self.obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(self.obs, self.pred) self.inds_df = pd.DataFrame(inds)
def test_plot_loss_from_log(self): conus_exps = ["basic_exp50"] config_data = load_dataconfig_case_exp(conus_exps[0]) log_file = os.path.join(config_data.data_path["Out"], "340epoch_run.csv") df_log = pd.read_csv(log_file, header=None) log_time_lst = np.array([ float(log_i.split(" ")[-1]) for log_i in df_log.iloc[:, 0].values ]) print("time: ", str(np.sum(log_time_lst[340:]) / 60), " mins")
def test_ensemble_results(self): preds = [] obss = [] # cases_exps = ["basic_exp11", "basic_exp17"] cases_exps = [ "basic_exp12", "basic_exp13", "basic_exp14", "basic_exp15", "basic_exp16", "basic_exp18" ] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) preds.append(pred_i) obss.append(obs_i) preds_np = np.array(preds) obss_np = np.array(obss) pred_mean = np.mean(preds_np, axis=0) obs_mean = np.mean(obss_np, axis=0) inds = statError(obs_mean, pred_mean) inds_df = pd.DataFrame(inds) data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') # plot map ts show_ind_key = 'NSE' idx_lst = np.arange(len(data_model.t_s_dict["sites_id"])).tolist() # nse_range = [0.5, 1] nse_range = [0, 1] # nse_range = [-10000, 1] # nse_range = [-10000, 0] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() plot_gages_map_and_ts(data_model, obs_mean, pred_mean, inds_df, show_ind_key, idx_lst_nse, pertile_range=[0, 100])
def test_plot_ecdf_together(self): xs = [] ys = [] cases_exps = ["basic_exp38", "warmup_exp1"] cases_exps_legends = ["without_warmup", "with_warmup"] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i["NSE"]) xs.append(x) ys.append(y) plot_ecdfs(xs, ys, cases_exps_legends)
def test_plot_map_cartopy_multi_vars(self): conus_exps = ["basic_exp37"] config_data = load_dataconfig_case_exp(cfg, conus_exps[0]) dor_1 = -0.02 source_data_dor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) # basins with dams source_data_withdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') all_lat = data_model.data_source.gage_dict["LAT_GAGE"] all_lon = data_model.data_source.gage_dict["LNG_GAGE"] conus_sites = data_model.t_s_dict["sites_id"] idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"] attrs = data_model.data_source.read_attr(conus_sites, attr_lst, is_return_dict=False) test_epoch = 300 inds_df, pred, obs = load_ensemble_result(cfg, conus_exps, test_epoch, return_value=True) show_ind_key = "NSE" nse_range = [0, 1] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus, idx_lst_nse).tolist() type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus, idx_lst_nse).tolist() frame = [] df_type1 = pd.DataFrame({ "type": np.full(len(type_1_index_lst), "zero-dor"), show_ind_key: inds_df[show_ind_key].values[type_1_index_lst], "lat": all_lat[type_1_index_lst], "lon": all_lon[type_1_index_lst], "slope": attrs[type_1_index_lst, 0], "elevation": attrs[type_1_index_lst, 1] }) frame.append(df_type1) df_type2 = pd.DataFrame({ "type": np.full(len(type_2_index_lst), "small-dor"), show_ind_key: inds_df[show_ind_key].values[type_2_index_lst], "lat": all_lat[type_2_index_lst], "lon": all_lon[type_2_index_lst], "slope": attrs[type_2_index_lst, 0], "elevation": attrs[type_2_index_lst, 1] }) frame.append(df_type2) data_df = pd.concat(frame) idx_lst = [ np.arange(len(type_1_index_lst)), np.arange(len(type_1_index_lst), len(type_1_index_lst) + len(type_2_index_lst)) ] plot_gages_map_and_scatter(data_df, [show_ind_key, "lat", "lon", "elevation"], idx_lst, cmap_strs=["Reds", "Blues"], labels=["zero-dor", "small-dor"], scatter_label=[attr_lst[1], show_ind_key]) # matplotlib.rcParams.update({'font.size': 12}) plt.tight_layout() plt.show()
smalldam_exp_lst = [ "dam_exp39", "dam_exp42", "dam_exp45", "dam_exp48", "dam_exp51", "dam_exp54" ] largedam_exp_lst = [ "dam_exp38", "dam_exp41", "dam_exp44", "dam_exp47", "dam_exp50", "dam_exp53" ] dor_cutoff = 0.1 test_epoch = 300 FIGURE_DPI = 600 # nodam_config_data = load_dataconfig_case_exp(nodam_exp_lst[0]) # smalldam_config_data = load_dataconfig_case_exp(smalldam_exp_lst[0]) # largedam_config_data = load_dataconfig_case_exp(largedam_exp_lst[0]) pair1_config_data = load_dataconfig_case_exp(cfg, pair1_exps[0]) pair2_config_data = load_dataconfig_case_exp(cfg, pair2_exps[0]) pair3_config_data = load_dataconfig_case_exp(cfg, pair3_exps[0]) conus_config_data = load_dataconfig_case_exp(cfg, conus_exps[0]) conus_data_model = GagesModel.load_datamodel( conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"]
def test_plot_each_symmetric_exp(self): train_set = self.train_set test_set = self.test_set show_ind_key = self.show_ind_key test_epoch = self.test_epoch split_num = self.split_num exp_lst = self.symmetric_exp_lst train_data_name_lst = self.symmetric_train_data_name_lst test_data_name_lst = self.symmetric_test_data_name_lst colors = "Greens" sns.set(font_scale=1) fig = plt.figure() ax_k = fig.add_axes() frames = [] for j in range(len(exp_lst)): config_data = load_dataconfig_case_exp(cfg, exp_lst[j]) preds = [] obss = [] preds2 = [] obss2 = [] predsbase = [] obssbase = [] for i in range(split_num): data_model_base = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_base.txt', stat_file_name='test_Statistics_base.json', flow_file_name='test_flow_base.npy', forcing_file_name='test_forcing_base.npy', attr_file_name='test_attr_base.npy', f_dict_file_name='test_dictFactorize_base.json', var_dict_file_name='test_dictAttribute_base.json', t_s_dict_file_name='test_dictTimeSpace_base.json') data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_2 = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_2.txt', stat_file_name='test_Statistics_2.json', flow_file_name='test_flow_2.npy', forcing_file_name='test_forcing_2.npy', attr_file_name='test_attr_2.npy', f_dict_file_name='test_dictFactorize_2.json', var_dict_file_name='test_dictAttribute_2.json', t_s_dict_file_name='test_dictTimeSpace_2.json') pred_base, obs_base = load_result( data_model_base.data_source.data_config.data_path['Temp'], test_epoch, pred_name='flow_pred_base', obs_name='flow_obs_base') pred_base = pred_base.reshape(pred_base.shape[0], pred_base.shape[1]) obs_base = obs_base.reshape(obs_base.shape[0], obs_base.shape[1]) hydro_logger.info("the size of %s %s Train-base %s", j, i, pred_base.shape[0]) predsbase.append(pred_base) obssbase.append(obs_base) pred_i, obs_i = load_result( data_model.data_source.data_config.data_path['Temp'], test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) hydro_logger.info("the size of %s %s PUB-1 %s", j, i, pred_i.shape[0]) preds.append(pred_i) obss.append(obs_i) pred_2, obs_2 = load_result( data_model_2.data_source.data_config.data_path['Temp'], test_epoch, pred_name='flow_pred_2', obs_name='flow_obs_2') pred_2 = pred_2.reshape(pred_2.shape[0], pred_2.shape[1]) obs_2 = obs_2.reshape(obs_2.shape[0], obs_2.shape[1]) hydro_logger.info("the size of %s %s PUB-2 %s", j, i, pred_2.shape[0]) preds2.append(pred_2) obss2.append(obs_2) predsbase_np = reduce(lambda a, b: np.vstack((a, b)), predsbase) obssbase_np = reduce(lambda a, b: np.vstack((a, b)), obssbase) indsbase = statError(obssbase_np, predsbase_np) inds_df_abase = pd.DataFrame(indsbase) preds_np = reduce(lambda a, b: np.vstack((a, b)), preds) obss_np = reduce(lambda a, b: np.vstack((a, b)), obss) inds = statError(obss_np, preds_np) inds_df_a = pd.DataFrame(inds) preds2_np = reduce(lambda a, b: np.vstack((a, b)), preds2) obss2_np = reduce(lambda a, b: np.vstack((a, b)), obss2) inds2 = statError(obss2_np, preds2_np) inds_df_a2 = pd.DataFrame(inds2) if j == 0 or j == 1: df_abase = pd.DataFrame({ train_set: np.full([inds_df_abase.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_abase.shape[0]], test_data_name_lst[j]), show_ind_key: inds_df_abase[show_ind_key] }) frames.append(df_abase) if j == 1: df_a = pd.DataFrame({ train_set: np.full([inds_df_a.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a.shape[0]], test_data_name_lst[3]), show_ind_key: inds_df_a[show_ind_key] }) df_a2 = pd.DataFrame({ train_set: np.full([inds_df_a2.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a2.shape[0]], test_data_name_lst[2]), show_ind_key: inds_df_a2[show_ind_key] }) else: df_a = pd.DataFrame({ train_set: np.full([inds_df_a.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a.shape[0]], test_data_name_lst[2]), show_ind_key: inds_df_a[show_ind_key] }) df_a2 = pd.DataFrame({ train_set: np.full([inds_df_a2.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a2.shape[0]], test_data_name_lst[3]), show_ind_key: inds_df_a2[show_ind_key] }) frames.append(df_a) frames.append(df_a2) result = pd.concat(frames) sns_box = sns.boxplot( ax=ax_k, x=train_set, y=show_ind_key, hue=test_set, # hue_order=test_data_name_lst, data=result, showfliers=False, palette=colors) # , width=0.8 medians = result.groupby([train_set, test_set], sort=False)[show_ind_key].median().values hydro_logger.info(medians) create_median_labels(sns_box.axes, has_fliers=False) sns.despine() plt.tight_layout() plt.show() hydro_logger.debug("plot successfully")
def test_zero_small_dor_basins_locations(self): conus_exps = self.exp_lst test_epoch = self.test_epoch inds_df, pred, obs = load_ensemble_result(self.config_file, conus_exps, test_epoch, return_value=True) conus_config_data = load_dataconfig_case_exp(self.config_file, conus_exps[0]) conus_data_model = GagesModel.load_datamodel( conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"] all_lat = conus_data_model.data_source.gage_dict["LAT_GAGE"] all_lon = conus_data_model.data_source.gage_dict["LNG_GAGE"] show_ind_key = "NSE" attr_lst = ["SLOPE_PCT", "ELEV_MEAN_M_BASIN"] attrs = conus_data_model.data_source.read_attr(conus_sites, attr_lst, is_return_dict=False) western_lon_idx = [i for i in range(all_lon.size) if all_lon[i] < -100] nse_range = [0, 1] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() idx_lst_nse = np.intersect1d(western_lon_idx, idx_lst_nse) # small dor source_data_dor1 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=-self.dor) # basins with dams source_data_withdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] type_1_index_lst = np.intersect1d(idx_lst_nodam_in_conus, idx_lst_nse).tolist() type_2_index_lst = np.intersect1d(idx_lst_smalldam_in_conus, idx_lst_nse).tolist() pd.DataFrame({ "GAGE_ID": np.array(conus_sites)[type_1_index_lst] }).to_csv( os.path.join(conus_config_data.data_path["Out"], "western-zero-dor-sites.csv")) pd.DataFrame({ "GAGE_ID": np.array(conus_sites)[type_2_index_lst] }).to_csv( os.path.join(conus_config_data.data_path["Out"], "western-small-dor-sites.csv")) frame = [] df_type1 = pd.DataFrame({ "type": np.full(len(type_1_index_lst), "zero-dor"), show_ind_key: inds_df[show_ind_key].values[type_1_index_lst], "lat": all_lat[type_1_index_lst], "lon": all_lon[type_1_index_lst], "slope": attrs[type_1_index_lst, 0], "elevation": attrs[type_1_index_lst, 1] }) frame.append(df_type1) df_type2 = pd.DataFrame({ "type": np.full(len(type_2_index_lst), "small-dor"), show_ind_key: inds_df[show_ind_key].values[type_2_index_lst], "lat": all_lat[type_2_index_lst], "lon": all_lon[type_2_index_lst], "slope": attrs[type_2_index_lst, 0], "elevation": attrs[type_2_index_lst, 1] }) frame.append(df_type2) data_df = pd.concat(frame) idx_lst = [ np.arange(len(type_1_index_lst)), np.arange(len(type_1_index_lst), len(type_1_index_lst) + len(type_2_index_lst)) ] plot_gages_map_and_scatter(data_df, [show_ind_key, "lat", "lon", "slope"], idx_lst, cmap_strs=["Reds", "Blues"], labels=["zero-dor", "small-dor"], scatter_label=[attr_lst[0], show_ind_key], wspace=2, hspace=1.5, legend_y=.8, sub_fig_ratio=[6, 4, 1]) plt.tight_layout() plt.show()
def test_diff_dor(self): dor_1 = -self.dor dor_2 = self.dor test_epoch = self.test_epoch config_file = self.config_file conus_exps = ["basic_exp37"] pair1_exps = ["dam_exp1"] pair2_exps = ["nodam_exp7"] pair3_exps = ["dam_exp27"] nodam_exp_lst = ["nodam_exp1"] smalldam_exp_lst = [ "dam_exp17" ] # -0.003["dam_exp11"] -0.08["dam_exp17"] -1["dam_exp32"] largedam_exp_lst = [ "dam_exp4" ] # 0.003["dam_exp12"] 0.08["dam_exp18"] 1["dam_exp33"] pair1_config_data = load_dataconfig_case_exp(config_file, pair1_exps[0]) pair2_config_data = load_dataconfig_case_exp(config_file, pair2_exps[0]) pair3_config_data = load_dataconfig_case_exp(config_file, pair3_exps[0]) conus_config_data = load_dataconfig_case_exp(config_file, conus_exps[0]) conus_data_model = GagesModel.load_datamodel( conus_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') conus_sites = conus_data_model.t_s_dict["sites_id"] source_data_dor1 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_1) source_data_dor2 = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, DOR=dor_2) # basins with dams source_data_withdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=[1, 10000]) # basins without dams source_data_withoutdams = GagesSource.choose_some_basins( conus_config_data, conus_config_data.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False, dam_num=0) sites_id_dor1 = source_data_dor1.all_configs['flow_screen_gage_id'] sites_id_withdams = source_data_withdams.all_configs[ 'flow_screen_gage_id'] sites_id_nodam = source_data_withoutdams.all_configs[ 'flow_screen_gage_id'] sites_id_smalldam = np.intersect1d( np.array(sites_id_dor1), np.array(sites_id_withdams)).tolist() sites_id_largedam = source_data_dor2.all_configs['flow_screen_gage_id'] # sites_id_nolargedam = np.sort(np.union1d(np.array(sites_id_nodam), np.array(sites_id_largedam))).tolist() # pair1_sites = np.sort(np.intersect1d(np.array(sites_id_dor1), np.array(conus_sites))).tolist() # pair2_sites = np.sort(np.intersect1d(np.array(sites_id_nolargedam), np.array(conus_sites))).tolist() # pair3_sites = np.sort(np.intersect1d(np.array(sites_id_withdams), np.array(conus_sites))).tolist() pair1_data_model = GagesModel.load_datamodel( pair1_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair1_sites = pair1_data_model.t_s_dict["sites_id"] pair2_data_model = GagesModel.load_datamodel( pair2_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair2_sites = pair2_data_model.t_s_dict["sites_id"] pair3_data_model = GagesModel.load_datamodel( pair3_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pair3_sites = pair3_data_model.t_s_dict["sites_id"] idx_lst_nodam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_nodam ] idx_lst_nodam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_nodam ] idx_lst_nodam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_nodam ] idx_lst_nodam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_nodam ] idx_lst_smalldam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_smalldam ] idx_lst_smalldam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_smalldam ] idx_lst_largedam_in_pair1 = [ i for i in range(len(pair1_sites)) if pair1_sites[i] in sites_id_largedam ] idx_lst_largedam_in_pair2 = [ i for i in range(len(pair2_sites)) if pair2_sites[i] in sites_id_largedam ] idx_lst_largedam_in_pair3 = [ i for i in range(len(pair3_sites)) if pair3_sites[i] in sites_id_largedam ] idx_lst_largedam_in_conus = [ i for i in range(len(conus_sites)) if conus_sites[i] in sites_id_largedam ] print("multi box") inds_df_pair1 = load_ensemble_result(config_file, pair1_exps, test_epoch) inds_df_pair2 = load_ensemble_result(config_file, pair2_exps, test_epoch) inds_df_pair3 = load_ensemble_result(config_file, pair3_exps, test_epoch) inds_df_conus = load_ensemble_result(config_file, conus_exps, test_epoch) fig = plt.figure(figsize=(15, 8)) gs = gridspec.GridSpec(1, 3) keys_nse = "NSE" color_chosen = ["Greens", "Blues", "Reds"] median_loc = 0.015 decimal_places = 2 sns.despine() sns.set(font_scale=1.5) attr_nodam = "zero_dor" cases_exps_legends_nodam = [ "LSTM-Z", "LSTM-ZS", "LSTM-ZL", "LSTM-CONUS" ] frames_nodam = [] inds_df_nodam = load_ensemble_result(config_file, nodam_exp_lst, test_epoch) df_nodam_alone = pd.DataFrame({ attr_nodam: np.full([inds_df_nodam.shape[0]], cases_exps_legends_nodam[0]), keys_nse: inds_df_nodam[keys_nse] }) frames_nodam.append(df_nodam_alone) df_nodam_in_pair1 = pd.DataFrame({ attr_nodam: np.full([ inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1].shape[0] ], cases_exps_legends_nodam[1]), keys_nse: inds_df_pair1[keys_nse].iloc[idx_lst_nodam_in_pair1] }) frames_nodam.append(df_nodam_in_pair1) df_nodam_in_pair2 = pd.DataFrame({ attr_nodam: np.full([ inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2].shape[0] ], cases_exps_legends_nodam[2]), keys_nse: inds_df_pair2[keys_nse].iloc[idx_lst_nodam_in_pair2] }) frames_nodam.append(df_nodam_in_pair2) df_nodam_in_conus = pd.DataFrame({ attr_nodam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus].shape[0] ], cases_exps_legends_nodam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_nodam_in_conus] }) frames_nodam.append(df_nodam_in_conus) result_nodam = pd.concat(frames_nodam) ax1 = plt.subplot(gs[0]) # ax1.set_title("(a)") ax1.set_xticklabels(ax1.get_xticklabels(), rotation=30) ax1.set_ylim([0, 1]) sns.boxplot(ax=ax1, x=attr_nodam, y=keys_nse, data=result_nodam, showfliers=False, palette=color_chosen[0]) medians_nodam = result_nodam.groupby( [attr_nodam], sort=False)[keys_nse].median().values median_labels_nodam = [ str(np.round(s, decimal_places)) for s in medians_nodam ] pos1 = range(len(medians_nodam)) for tick, label in zip(pos1, ax1.get_xticklabels()): ax1.text(pos1[tick], medians_nodam[tick] + median_loc, median_labels_nodam[tick], horizontalalignment='center', size='x-small', weight='semibold') attr_smalldam = "small_dor" cases_exps_legends_smalldam = [ "LSTM-S", "LSTM-ZS", "LSTM-SL", "LSTM-CONUS" ] frames_smalldam = [] inds_df_smalldam = load_ensemble_result(config_file, smalldam_exp_lst, test_epoch) df_smalldam_alone = pd.DataFrame({ attr_smalldam: np.full([inds_df_smalldam.shape[0]], cases_exps_legends_smalldam[0]), keys_nse: inds_df_smalldam[keys_nse] }) frames_smalldam.append(df_smalldam_alone) df_smalldam_in_pair1 = pd.DataFrame({ attr_smalldam: np.full([ inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1]. shape[0] ], cases_exps_legends_smalldam[1]), keys_nse: inds_df_pair1[keys_nse].iloc[idx_lst_smalldam_in_pair1] }) frames_smalldam.append(df_smalldam_in_pair1) df_smalldam_in_pair3 = pd.DataFrame({ attr_smalldam: np.full([ inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3]. shape[0] ], cases_exps_legends_smalldam[2]), keys_nse: inds_df_pair3[keys_nse].iloc[idx_lst_smalldam_in_pair3] }) frames_smalldam.append(df_smalldam_in_pair3) df_smalldam_in_conus = pd.DataFrame({ attr_smalldam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus]. shape[0] ], cases_exps_legends_smalldam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_smalldam_in_conus] }) frames_smalldam.append(df_smalldam_in_conus) result_smalldam = pd.concat(frames_smalldam) ax2 = plt.subplot(gs[1]) # ax2.set_title("(b)") ax2.set_xticklabels(ax2.get_xticklabels(), rotation=30) ax2.set_ylim([0, 1]) ax2.set(ylabel=None) sns.boxplot(ax=ax2, x=attr_smalldam, y=keys_nse, data=result_smalldam, showfliers=False, palette=color_chosen[1]) medians_smalldam = result_smalldam.groupby( [attr_smalldam], sort=False)[keys_nse].median().values median_labels_smalldam = [ str(np.round(s, decimal_places)) for s in medians_smalldam ] pos2 = range(len(medians_smalldam)) for tick, label in zip(pos2, ax2.get_xticklabels()): ax2.text(pos2[tick], medians_smalldam[tick] + median_loc, median_labels_smalldam[tick], horizontalalignment='center', size='x-small', weight='semibold') attr_largedam = "large_dor" cases_exps_legends_largedam = [ "LSTM-L", "LSTM-ZL", "LSTM-SL", "LSTM-CONUS" ] frames_largedam = [] inds_df_largedam = load_ensemble_result(config_file, largedam_exp_lst, test_epoch) df_largedam_alone = pd.DataFrame({ attr_largedam: np.full([inds_df_largedam.shape[0]], cases_exps_legends_largedam[0]), keys_nse: inds_df_largedam[keys_nse] }) frames_largedam.append(df_largedam_alone) df_largedam_in_pair2 = pd.DataFrame({ attr_largedam: np.full([ inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2]. shape[0] ], cases_exps_legends_largedam[1]), keys_nse: inds_df_pair2[keys_nse].iloc[idx_lst_largedam_in_pair2] }) frames_largedam.append(df_largedam_in_pair2) df_largedam_in_pair3 = pd.DataFrame({ attr_largedam: np.full([ inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3]. shape[0] ], cases_exps_legends_largedam[2]), keys_nse: inds_df_pair3[keys_nse].iloc[idx_lst_largedam_in_pair3] }) frames_largedam.append(df_largedam_in_pair3) df_largedam_in_conus = pd.DataFrame({ attr_largedam: np.full([ inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus]. shape[0] ], cases_exps_legends_largedam[3]), keys_nse: inds_df_conus[keys_nse].iloc[idx_lst_largedam_in_conus] }) frames_largedam.append(df_largedam_in_conus) result_largedam = pd.concat(frames_largedam) ax3 = plt.subplot(gs[2]) # ax3.set_title("(c)") ax3.set_xticklabels(ax3.get_xticklabels(), rotation=30) ax3.set_ylim([0, 1]) ax3.set(ylabel=None) sns.boxplot(ax=ax3, x=attr_largedam, y=keys_nse, data=result_largedam, showfliers=False, palette=color_chosen[2]) medians_largedam = result_largedam.groupby( [attr_largedam], sort=False)[keys_nse].median().values median_labels_largedam = [ str(np.round(s, decimal_places)) for s in medians_largedam ] pos3 = range(len(medians_largedam)) for tick, label in zip(pos3, ax3.get_xticklabels()): ax3.text(pos3[tick], medians_largedam[tick] + median_loc, median_labels_largedam[tick], horizontalalignment='center', size='x-small', weight='semibold') # sns.despine() plt.tight_layout() plt.show()
def test_stor_seperate(self): config_dir = definitions.CONFIG_DIR config_file = os.path.join(config_dir, "basic/config_exp18.ini") subdir = r"basic/exp18" config_data = GagesConfig.set_subdir(config_file, subdir) data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') all_sites = data_model.t_s_dict["sites_id"] storage_nor_1 = [0, 50] storage_nor_2 = [50, 15000] # max is 14348.6581036888 source_data_nor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], STORAGE=storage_nor_1) source_data_nor2 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], STORAGE=storage_nor_2) sites_id_nor1 = source_data_nor1.all_configs['flow_screen_gage_id'] sites_id_nor2 = source_data_nor2.all_configs['flow_screen_gage_id'] idx_lst_nor1 = [ i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor1 ] idx_lst_nor2 = [ i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor2 ] pred, obs = load_result( data_model.data_source.data_config.data_path['Temp'], self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) inds_df = pd.DataFrame(inds) keys_nse = "NSE" xs = [] ys = [] cases_exps_legends_together = ["small_stor", "large_stor"] x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor1]) xs.append(x1) ys.append(y1) x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor2]) xs.append(x2) ys.append(y2) cases_exps = ["dam_exp12", "dam_exp11"] cases_exps_legends_separate = ["small_stor", "large_stor"] # cases_exps = ["dam_exp4", "dam_exp5", "dam_exp6"] # cases_exps = ["dam_exp1", "dam_exp2", "dam_exp3"] # cases_exps_legends = ["dam-lstm", "dam-with-natural-flow", "dam-with-kernel"] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i[keys_nse]) xs.append(x) ys.append(y) plot_ecdfs(xs, ys, cases_exps_legends_together + cases_exps_legends_separate, style=["together", "together", "separate", "separate"])
gpu_attr_lst = [0, 0, 0, 0, 0, 0] dor_cutoff = 0.1 # exp_lst = ["basic_exp37"] # exp_attr_lst = ["basic_exp2"] # gpu_lst = [0] # gpu_attr_lst = [0] doLst = list() # doLst.append('train') # doLst.append('test') doLst.append('post') test_epoch = 300 FIGURE_DPI = 600 # test if 'test' in doLst: for i in range(len(exp_lst)): config_data = load_dataconfig_case_exp(cfg, exp_lst[i]) quick_data_dir = os.path.join(config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir,
"PUB-l"]] # ["Train-c", "PUB-c", "PUB-n"] test_epoch = 300 FIGURE_DPI = 600 split_num = 2 # camels_pub_split_num = 12 camels_pub_split_num = 2 # test doLst = list() # doLst.append('train') # doLst.append('test') doLst.append('post') if 'test' in doLst: zerodor_config_data = load_dataconfig_case_exp( cfg, camels_pub_on_diff_dor_exp_lst[0]) quick_data_dir = os.path.join(zerodor_config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir,
import matplotlib.pyplot as plt conus_exps = [ "basic_exp37", "basic_exp39", "basic_exp40", "basic_exp41", "basic_exp42", "basic_exp43" ] exp_lst = [ "basic_exp31", "basic_exp32", "basic_exp33", "basic_exp34", "basic_exp35", "basic_exp36" ] gpu_lst = [0, 0, 0, 0, 0, 0] doLst = list() # doLst.append('cache') test_epoch = 300 FIGURE_DPI = 600 all_config_Data = load_dataconfig_case_exp(cfg, conus_exps[0]) config_data = load_dataconfig_case_exp(cfg, exp_lst[0]) if 'cache' in doLst: quick_data_dir = os.path.join(all_config_Data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json')
hydro_logger.info("Are dor values of those basins bigger than 1: %s", paper_dors > 1) test_epoch = 300 all_exps_lst = ["basic_exp39", "basic_exp37", "basic_exp40", "basic_exp41", "basic_exp42", "basic_exp43", "basic_exp32", "basic_exp31", "basic_exp33", "basic_exp34", "basic_exp35", "basic_exp36"] all_exps_random_seeds = ["123", "1234", "12345", "111", "1111", "11111", "123", "1234", "12345", "111", "1111", "11111"] idx_tmp_now = 0 for exp_tmp in all_exps_lst: exp_tmp_lst = [exp_tmp] inds_df_tmp = load_ensemble_result(cfg, exp_tmp_lst, test_epoch) hydro_logger.info("The median NSE value of %s is %.2f (random seed: %s)", exp_tmp, inds_df_tmp.median()["NSE"], all_exps_random_seeds[idx_tmp_now]) idx_tmp_now = idx_tmp_now + 1 exp_lst = ["basic_exp37"] config_data = load_dataconfig_case_exp(cfg, exp_lst[0]) data_model = GagesModel.load_datamodel(config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') camels_gageid_file = os.path.join(config_data.data_path["DB"], "camels_attributes_v2.0", "camels_attributes_v2.0", "camels_name.txt") gauge_df = pd.read_csv(camels_gageid_file, dtype={"gauge_id": str}, sep=';') gauge_list = gauge_df["gauge_id"].values "show the relationship between NSE and some attrs" attr_lst_shown = ["NDAMS_2009", "STOR_NOR_2009", "RAW_DIS_NEAREST_MAJ_DAM", "RAW_AVG_DIS_ALLDAMS", "FRESHW_WITHDRAWAL", "PCT_IRRIG_AG", "POWER_SUM_MW", "PDEN_2000_BLOCK", "ROADS_KM_SQ_KM", "IMPNLCD06"]