def test_comp_result(self): for i in range(self.split_num): data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_majordam = GagesModel.load_datamodel(self.config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_majordam.txt', stat_file_name='test_Statistics_majordam.json', flow_file_name='test_flow_majordam.npy', forcing_file_name='test_forcing_majordam.npy', attr_file_name='test_attr_majordam.npy', f_dict_file_name='test_dictFactorize_majordam.json', var_dict_file_name='test_dictAttribute_majordam.json', t_s_dict_file_name='test_dictTimeSpace_majordam.json') pred, obs = load_result(data_model.data_source.data_config.data_path['Temp'], self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) pred_majordam, obs_majordam = load_result(data_model_majordam.data_source.data_config.data_path['Temp'], self.test_epoch, pred_name='flow_pred_majordam', obs_name='flow_obs_majordam') pred_majordam = pred_majordam.reshape(pred_majordam.shape[0], pred_majordam.shape[1]) obs_majordam = obs_majordam.reshape(obs_majordam.shape[0], obs_majordam.shape[1]) inds_majordam = statError(obs_majordam, pred_majordam) inds_majordam['STAID'] = data_model_majordam.t_s_dict["sites_id"] inds_majordam_df = pd.DataFrame(inds_majordam) keys_nse = "NSE" xs = [] ys = [] cases_exps_legends_together = ["PUB_test_in_no-major-dam_basins", "PUB_test_in_major-dam_basins"] x1, y1 = ecdf(inds_df[keys_nse]) xs.append(x1) ys.append(y1) x2, y2 = ecdf(inds_majordam_df[keys_nse]) xs.append(x2) ys.append(y2) plot_ecdfs(xs, ys, cases_exps_legends_together)
def test_plot_ecdf_matplotlib(self): xs = [] ys = [] cases_exps = [ "basic_exp37", "basic_exp39", "basic_exp40", "basic_exp41", "basic_exp42" ] cases_exps_legends = [ "random_1234", "random_123", "random_12345", "random_111", "random_1111" ] test_epoch = 300 for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(cfg, case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i[self.keys[0]]) xs.append(x) ys.append(y) dash_lines = [False, False, False, False, True] plot_ecdfs_matplot( xs, ys, cases_exps_legends, colors=["#1f77b4", "#ff7f0e", "#2ca02c", "#d62728", "grey"], dash_lines=dash_lines, x_str="NSE", y_str="CDF") plt.show()
def test_plot_cases(self): nid_dir = os.path.join("/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") gage_main_dam_purpose = unserialize_json(os.path.join(nid_dir, "dam_main_purpose_dict.json")) gage_main_dam_purpose_lst = list(gage_main_dam_purpose.values()) gage_main_dam_purpose_unique = np.unique(gage_main_dam_purpose_lst) for i in range(0, gage_main_dam_purpose_unique.size): data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], gage_main_dam_purpose_unique[i], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') new_temp_dir = os.path.join(data_model.data_source.data_config.model_dict["dir"]["Temp"], gage_main_dam_purpose_unique[i]) new_out_dir = os.path.join(data_model.data_source.data_config.model_dict["dir"]["Out"], gage_main_dam_purpose_unique[i]) data_model.update_datamodel_dir(new_temp_dir, new_out_dir) pred, obs = load_result(new_temp_dir, self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) inds_df = pd.DataFrame(inds) print(gage_main_dam_purpose_unique[i]) print(inds_df.median(axis=0)) print(inds_df.mean(axis=0))
def test_ensemble_results_plot_box(self): preds = [] obss = [] # cases_exps = ["basic_exp11", "basic_exp17"] cases_exps = [ "basic_exp12", "basic_exp13", "basic_exp14", "basic_exp15", "basic_exp16", "basic_exp18" ] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) print(obs_i) preds.append(pred_i) obss.append(obs_i) preds_np = np.array(preds) obss_np = np.array(obss) pred_mean = np.mean(preds_np, axis=0) obs_mean = np.mean(obss_np, axis=0) inds = statError(obs_mean, pred_mean) keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test)
def test_plot_ecdf_together(self): xs = [] ys = [] cases_exps = ["basic_exp37", "basic_exp39"] cases_exps_legends = ["random_1234", "random_123"] test_epoch = 300 for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(cfg, case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i[self.keys[0]]) xs.append(x) ys.append(y) plot_ecdfs(xs, ys, cases_exps_legends, x_str="NSE", y_str="CDF") # cases_exps_addition = ["basic_exp39"] # xs_addition = [] # ys_addition = [] # for case_exp in cases_exps_addition: # config_data_i = load_dataconfig_case_exp(cfg, case_exp) # pred_i, obs_i = load_result(config_data_i.data_path['Temp'], test_epoch) # pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) # obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) # inds_i = statError(obs_i, pred_i) # x, y = ecdf(inds_i[self.keys[0]]) # xs_addition.append(x) # ys_addition.append(y) # plot_ecdfs(xs_addition, ys_addition, ["new"], x_str="NSE", y_str="CDF") plt.show()
def test_sim_plot(self): data_model2 = GagesModel.load_datamodel( self.config_data_lstm.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pred, obs = load_result( data_model2.data_source.data_config.data_path['Temp'], self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model2.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) inds_df.to_csv( os.path.join(self.config_data_lstm.data_path["Out"], 'data_df.csv')) # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join(self.config_data_lstm.data_path["Out"], "box_fig.png")) # plot ts show_me_num = 5 t_s_dict = data_model2.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) time_seq_length = data_model2.data_source.data_config.model_dict[ 'model']['seqLength'] time_start = np.datetime64(t_range[0]) + np.timedelta64( time_seq_length - 1, 'D') t_range[0] = np.datetime_as_string(time_start, unit='D') ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join(self.config_data_lstm.data_path["Out"], "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf(sites_df_nse, keys[2]) # plot map gauge_dict = data_model2.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def setUp(self): """analyze result of model""" self.exp_num = "basic_exp37" self.config_data = load_dataconfig_case_exp(cfg, self.exp_num) self.test_epoch = 300 self.data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') attrBasin = ['ELEV_MEAN_M_BASIN', 'SLOPE_PCT', 'DRAIN_SQKM'] attrLandcover = [ 'FORESTNLCD06', 'BARRENNLCD06', 'DECIDNLCD06', 'EVERGRNLCD06', 'MIXEDFORNLCD06', 'SHRUBNLCD06', 'GRASSNLCD06', 'WOODYWETNLCD06', 'EMERGWETNLCD06' ] attrSoil = ['ROCKDEPAVE', 'AWCAVE', 'PERMAVE', 'RFACT'] attrGeol = [ 'GEOL_REEDBUSH_DOM', 'GEOL_REEDBUSH_DOM_PCT', 'GEOL_REEDBUSH_SITE' ] attrHydro = [ 'STREAMS_KM_SQ_KM', 'STRAHLER_MAX', 'MAINSTEM_SINUOUSITY', 'REACHCODE', 'ARTIFPATH_PCT', 'ARTIFPATH_MAINSTEM_PCT', 'HIRES_LENTIC_PCT', 'BFI_AVE', 'PERDUN', 'PERHOR', 'TOPWET', 'CONTACT' ] attrHydroModDams = [ 'NDAMS_2009', 'STOR_NOR_2009', 'RAW_AVG_DIS_ALL_MAJ_DAMS' ] attrHydroModOther = [ 'CANALS_PCT', 'RAW_AVG_DIS_ALLCANALS', 'NPDES_MAJ_DENS', 'RAW_AVG_DIS_ALL_MAJ_NPDES', 'FRESHW_WITHDRAWAL', 'PCT_IRRIG_AG', 'POWER_SUM_MW' ] attrLandscapePat = ['FRAGUN_BASIN'] attrLC06Basin = ['DEVNLCD06', 'FORESTNLCD06', 'PLANTNLCD06'] attrPopInfrastr = ['ROADS_KM_SQ_KM'] attrProtAreas = ['PADCAT1_PCT_BASIN', 'PADCAT2_PCT_BASIN'] self.attr_lst = attrLandscapePat + attrLC06Basin + attrPopInfrastr + attrProtAreas # self.attr_lst = attrHydroModOther # plot is_nse_good pred, obs = load_result( self.data_model.data_source.data_config.data_path['Temp'], self.test_epoch) self.pred = pred.reshape(pred.shape[0], pred.shape[1]) self.obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(self.obs, self.pred) self.inds_df = pd.DataFrame(inds)
def test_ensemble_results(self): preds = [] obss = [] # cases_exps = ["basic_exp11", "basic_exp17"] cases_exps = [ "basic_exp12", "basic_exp13", "basic_exp14", "basic_exp15", "basic_exp16", "basic_exp18" ] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) preds.append(pred_i) obss.append(obs_i) preds_np = np.array(preds) obss_np = np.array(obss) pred_mean = np.mean(preds_np, axis=0) obs_mean = np.mean(obss_np, axis=0) inds = statError(obs_mean, pred_mean) inds_df = pd.DataFrame(inds) data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') # plot map ts show_ind_key = 'NSE' idx_lst = np.arange(len(data_model.t_s_dict["sites_id"])).tolist() # nse_range = [0.5, 1] nse_range = [0, 1] # nse_range = [-10000, 1] # nse_range = [-10000, 0] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() plot_gages_map_and_ts(data_model, obs_mean, pred_mean, inds_df, show_ind_key, idx_lst_nse, pertile_range=[0, 100])
def test_plot_ecdf_together(self): xs = [] ys = [] cases_exps = ["basic_exp38", "warmup_exp1"] cases_exps_legends = ["without_warmup", "with_warmup"] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i["NSE"]) xs.append(x) ys.append(y) plot_ecdfs(xs, ys, cases_exps_legends)
def test_plot_pretrained_model_test(self): data_model_test = GagesModel.load_datamodel( self.majordam_config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pretrained_model_name = "nodam" + self.nomajordam_subdir.split( "/")[1] + "_pretrained_model" save_dir = os.path.join( data_model_test.data_source.data_config.data_path['Out'], pretrained_model_name) pred, obs = load_result(save_dir, self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig(os.path.join(save_dir, "box_fig.png")) # plot ts show_me_num = 5 t_s_dict = data_model_test.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig(os.path.join(save_dir, "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf(sites_df_nse, keys[2]) # plot map gauge_dict = data_model_test.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def split_results_to_regions(gages_data_model, epoch, id_regions_idx, id_regions_sites_ids): pred_all, obs_all = load_result( gages_data_model.data_source.data_config.data_path['Temp'], epoch) pred_all = pred_all.reshape(pred_all.shape[0], pred_all.shape[1]) obs_all = obs_all.reshape(obs_all.shape[0], obs_all.shape[1]) preds = [] obss = [] inds_dfs = [] for i in range(len(id_regions_idx)): pred = pred_all[id_regions_idx[i], :] obs = obs_all[id_regions_idx[i], :] preds.append(pred) obss.append(obs) inds = statError(obs, pred) inds['STAID'] = id_regions_sites_ids[i] inds_df = pd.DataFrame(inds) # inds_df.to_csv(os.path.join(gages_data_model.data_source.data_config.data_path["Out"], # regions_name[i] + "epoch" + str(epoch) + 'data_df.csv')) inds_dfs.append(inds_df) return preds, obss, inds_dfs
def test_export_result(self): data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pred, obs = load_result( data_model.data_source.data_config.data_path['Temp'], self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) inds_df.to_csv('data_df.csv')
def test_plot_pretrained_model_test(self): data_model_test = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') save_dir = data_model_test.data_source.data_config.data_path['Temp'] pred, obs = load_result(save_dir, self.config_data.config_file.TEST_EPOCH) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) keys = ["Bias", "RMSE", "NSE"] t_s_dict = data_model_test.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) # plot nse ecdf sites_df_nse = pd.DataFrame({"sites": sites, keys[2]: inds[keys[2]]}) plot_ecdf(sites_df_nse, keys[2]) # plot map gauge_dict = data_model_test.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE") # plot box,使用seaborn库 inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) # plot ts show_me_num = 5 ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) plt.show()
var_dict_file_name='test_dictAttribute_base.json', t_s_dict_file_name='test_dictTimeSpace_base.json') data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pred_base, obs_base = load_result( data_model_base.data_source.data_config.data_path['Temp'], test_epoch, pred_name='flow_pred_base', obs_name='flow_obs_base') pred_base = pred_base.reshape(pred_base.shape[0], pred_base.shape[1]) obs_base = obs_base.reshape(obs_base.shape[0], obs_base.shape[1]) hydro_logger.info("the size of %s %s %s Train-c %s", k, 0, i, pred_base.shape[0]) predsbase.append(pred_base) obssbase.append(obs_base) pred_i, obs_i = load_result( data_model.data_source.data_config.data_path['Temp'], test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1])
def test_purposes_seperate(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "allnonref-dam_95-05_nan-0.1_00-1.0") data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model = GagesModel.update_data_model(self.config_data, data_model_test) nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) gage_main_dam_purpose_lst = list(gage_main_dam_purpose.values()) gage_main_dam_purpose_unique = np.unique(gage_main_dam_purpose_lst) purpose_regions = {} for i in range(gage_main_dam_purpose_unique.size): sites_id = [] for key, value in gage_main_dam_purpose.items(): if value == gage_main_dam_purpose_unique[i]: sites_id.append(key) assert (all(x < y for x, y in zip(sites_id, sites_id[1:]))) purpose_regions[gage_main_dam_purpose_unique[i]] = sites_id id_regions_idx = [] id_regions_sites_ids = [] df_id_region = np.array(data_model.t_s_dict["sites_id"]) for key, value in purpose_regions.items(): gages_id = value c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True) assert (all(x < y for x, y in zip(ind1, ind1[1:]))) assert (all(x < y for x, y in zip(c, c[1:]))) id_regions_idx.append(ind1) id_regions_sites_ids.append(c) pred_all, obs_all = load_result(self.config_data.data_path["Temp"], self.test_epoch) pred_all = pred_all.reshape(pred_all.shape[0], pred_all.shape[1]) obs_all = obs_all.reshape(obs_all.shape[0], obs_all.shape[1]) for i in range(9, len(gage_main_dam_purpose_unique)): pred = pred_all[id_regions_idx[i], :] obs = obs_all[id_regions_idx[i], :] inds = statError(obs, pred) inds['STAID'] = id_regions_sites_ids[i] inds_df = pd.DataFrame(inds) inds_df.to_csv( os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + 'data_df.csv')) # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "box_fig.png")) # plot ts sites = np.array(df_id_region[id_regions_idx[i]]) t_range = np.array(data_model.t_s_dict["t_final_range"]) show_me_num = 1 ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf( sites_df_nse, keys[2], os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "ecdf_fig.png")) # plot map gauge_dict = data_model.data_source.gage_dict save_map_file = os.path.join( self.config_data.data_path["Out"], gage_main_dam_purpose_unique[i] + "epoch" + str(self.test_epoch) + "map_fig.png") plot_map(gauge_dict, sites_df_nse, save_file=save_map_file, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def test_plot_each_symmetric_exp(self): train_set = self.train_set test_set = self.test_set show_ind_key = self.show_ind_key test_epoch = self.test_epoch split_num = self.split_num exp_lst = self.symmetric_exp_lst train_data_name_lst = self.symmetric_train_data_name_lst test_data_name_lst = self.symmetric_test_data_name_lst colors = "Greens" sns.set(font_scale=1) fig = plt.figure() ax_k = fig.add_axes() frames = [] for j in range(len(exp_lst)): config_data = load_dataconfig_case_exp(cfg, exp_lst[j]) preds = [] obss = [] preds2 = [] obss2 = [] predsbase = [] obssbase = [] for i in range(split_num): data_model_base = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_base.txt', stat_file_name='test_Statistics_base.json', flow_file_name='test_flow_base.npy', forcing_file_name='test_forcing_base.npy', attr_file_name='test_attr_base.npy', f_dict_file_name='test_dictFactorize_base.json', var_dict_file_name='test_dictAttribute_base.json', t_s_dict_file_name='test_dictTimeSpace_base.json') data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_2 = GagesModel.load_datamodel( config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_2.txt', stat_file_name='test_Statistics_2.json', flow_file_name='test_flow_2.npy', forcing_file_name='test_forcing_2.npy', attr_file_name='test_attr_2.npy', f_dict_file_name='test_dictFactorize_2.json', var_dict_file_name='test_dictAttribute_2.json', t_s_dict_file_name='test_dictTimeSpace_2.json') pred_base, obs_base = load_result( data_model_base.data_source.data_config.data_path['Temp'], test_epoch, pred_name='flow_pred_base', obs_name='flow_obs_base') pred_base = pred_base.reshape(pred_base.shape[0], pred_base.shape[1]) obs_base = obs_base.reshape(obs_base.shape[0], obs_base.shape[1]) hydro_logger.info("the size of %s %s Train-base %s", j, i, pred_base.shape[0]) predsbase.append(pred_base) obssbase.append(obs_base) pred_i, obs_i = load_result( data_model.data_source.data_config.data_path['Temp'], test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) hydro_logger.info("the size of %s %s PUB-1 %s", j, i, pred_i.shape[0]) preds.append(pred_i) obss.append(obs_i) pred_2, obs_2 = load_result( data_model_2.data_source.data_config.data_path['Temp'], test_epoch, pred_name='flow_pred_2', obs_name='flow_obs_2') pred_2 = pred_2.reshape(pred_2.shape[0], pred_2.shape[1]) obs_2 = obs_2.reshape(obs_2.shape[0], obs_2.shape[1]) hydro_logger.info("the size of %s %s PUB-2 %s", j, i, pred_2.shape[0]) preds2.append(pred_2) obss2.append(obs_2) predsbase_np = reduce(lambda a, b: np.vstack((a, b)), predsbase) obssbase_np = reduce(lambda a, b: np.vstack((a, b)), obssbase) indsbase = statError(obssbase_np, predsbase_np) inds_df_abase = pd.DataFrame(indsbase) preds_np = reduce(lambda a, b: np.vstack((a, b)), preds) obss_np = reduce(lambda a, b: np.vstack((a, b)), obss) inds = statError(obss_np, preds_np) inds_df_a = pd.DataFrame(inds) preds2_np = reduce(lambda a, b: np.vstack((a, b)), preds2) obss2_np = reduce(lambda a, b: np.vstack((a, b)), obss2) inds2 = statError(obss2_np, preds2_np) inds_df_a2 = pd.DataFrame(inds2) if j == 0 or j == 1: df_abase = pd.DataFrame({ train_set: np.full([inds_df_abase.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_abase.shape[0]], test_data_name_lst[j]), show_ind_key: inds_df_abase[show_ind_key] }) frames.append(df_abase) if j == 1: df_a = pd.DataFrame({ train_set: np.full([inds_df_a.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a.shape[0]], test_data_name_lst[3]), show_ind_key: inds_df_a[show_ind_key] }) df_a2 = pd.DataFrame({ train_set: np.full([inds_df_a2.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a2.shape[0]], test_data_name_lst[2]), show_ind_key: inds_df_a2[show_ind_key] }) else: df_a = pd.DataFrame({ train_set: np.full([inds_df_a.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a.shape[0]], test_data_name_lst[2]), show_ind_key: inds_df_a[show_ind_key] }) df_a2 = pd.DataFrame({ train_set: np.full([inds_df_a2.shape[0]], train_data_name_lst[j]), test_set: np.full([inds_df_a2.shape[0]], test_data_name_lst[3]), show_ind_key: inds_df_a2[show_ind_key] }) frames.append(df_a) frames.append(df_a2) result = pd.concat(frames) sns_box = sns.boxplot( ax=ax_k, x=train_set, y=show_ind_key, hue=test_set, # hue_order=test_data_name_lst, data=result, showfliers=False, palette=colors) # , width=0.8 medians = result.groupby([train_set, test_set], sort=False)[show_ind_key].median().values hydro_logger.info(medians) create_median_labels(sns_box.axes, has_fliers=False) sns.despine() plt.tight_layout() plt.show() hydro_logger.debug("plot successfully")
def test_stor_seperate(self): config_dir = definitions.CONFIG_DIR config_file = os.path.join(config_dir, "basic/config_exp18.ini") subdir = r"basic/exp18" config_data = GagesConfig.set_subdir(config_file, subdir) data_model = GagesModel.load_datamodel( config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') all_sites = data_model.t_s_dict["sites_id"] storage_nor_1 = [0, 50] storage_nor_2 = [50, 15000] # max is 14348.6581036888 source_data_nor1 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], STORAGE=storage_nor_1) source_data_nor2 = GagesSource.choose_some_basins( config_data, config_data.model_dict["data"]["tRangeTrain"], STORAGE=storage_nor_2) sites_id_nor1 = source_data_nor1.all_configs['flow_screen_gage_id'] sites_id_nor2 = source_data_nor2.all_configs['flow_screen_gage_id'] idx_lst_nor1 = [ i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor1 ] idx_lst_nor2 = [ i for i in range(len(all_sites)) if all_sites[i] in sites_id_nor2 ] pred, obs = load_result( data_model.data_source.data_config.data_path['Temp'], self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(pred.shape[0], pred.shape[1]) inds = statError(obs, pred) inds_df = pd.DataFrame(inds) keys_nse = "NSE" xs = [] ys = [] cases_exps_legends_together = ["small_stor", "large_stor"] x1, y1 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor1]) xs.append(x1) ys.append(y1) x2, y2 = ecdf(inds_df[keys_nse].iloc[idx_lst_nor2]) xs.append(x2) ys.append(y2) cases_exps = ["dam_exp12", "dam_exp11"] cases_exps_legends_separate = ["small_stor", "large_stor"] # cases_exps = ["dam_exp4", "dam_exp5", "dam_exp6"] # cases_exps = ["dam_exp1", "dam_exp2", "dam_exp3"] # cases_exps_legends = ["dam-lstm", "dam-with-natural-flow", "dam-with-kernel"] for case_exp in cases_exps: config_data_i = load_dataconfig_case_exp(case_exp) pred_i, obs_i = load_result(config_data_i.data_path['Temp'], self.test_epoch) pred_i = pred_i.reshape(pred_i.shape[0], pred_i.shape[1]) obs_i = obs_i.reshape(obs_i.shape[0], obs_i.shape[1]) inds_i = statError(obs_i, pred_i) x, y = ecdf(inds_i[keys_nse]) xs.append(x) ys.append(y) plot_ecdfs(xs, ys, cases_exps_legends_together + cases_exps_legends_separate, style=["together", "together", "separate", "separate"])