def test_export_result(self): data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') flow_pred_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_pred.npy') flow_obs_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_obs.npy') pred = unserialize_numpy(flow_pred_file) obs = unserialize_numpy(flow_obs_file) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) inds_df.to_csv( os.path.join(self.config_data.data_path["Out"], 'data_df.csv'))
def load_datamodel(cls, dir_temp_orgin, num_str=None, **kwargs): if num_str: dir_temp = os.path.join(dir_temp_orgin, num_str) else: dir_temp = dir_temp_orgin data_source_file = os.path.join(dir_temp, kwargs['data_source_file_name']) stat_file = os.path.join(dir_temp, kwargs['stat_file_name']) flow_npy_file = os.path.join(dir_temp, kwargs['flow_file_name']) forcing_npy_file = os.path.join(dir_temp, kwargs['forcing_file_name']) attr_npy_file = os.path.join(dir_temp, kwargs['attr_file_name']) f_dict_file = os.path.join(dir_temp, kwargs['f_dict_file_name']) var_dict_file = os.path.join(dir_temp, kwargs['var_dict_file_name']) t_s_dict_file = os.path.join(dir_temp, kwargs['t_s_dict_file_name']) source_data = unserialize_pickle(data_source_file) # save data_model because of the low speed of serialization of data_model: dict -> json,data -> npy stat_dict = unserialize_json(stat_file) data_flow = unserialize_numpy(flow_npy_file) data_forcing = unserialize_numpy(forcing_npy_file) data_attr = unserialize_numpy(attr_npy_file) # dictFactorize.json is the explanation of value of categorical variables var_dict = unserialize_json(var_dict_file) f_dict = unserialize_json(f_dict_file) t_s_dict = unserialize_json(t_s_dict_file) data_model = cls(source_data, data_flow, data_forcing, data_attr, var_dict, f_dict, stat_dict, t_s_dict) return data_model
def test_inv_plot(self): data_model = GagesModel.load_datamodel( self.config_data_2.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') flow_pred_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'epoch' + str(self.test_epoch) + 'flow_pred.npy') flow_obs_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'epoch' + str(self.test_epoch) + 'flow_obs.npy') pred = unserialize_numpy(flow_pred_file) obs = unserialize_numpy(flow_obs_file) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) inds_df.to_csv( os.path.join(self.config_data_2.data_path["Out"], 'data_df.csv')) # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join(self.config_data_2.data_path["Out"], "box_fig.png")) # plot ts show_me_num = 5 t_s_dict = data_model.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) time_seq_length = self.config_data_1.model_dict['model']['seqLength'] time_start = np.datetime64(t_range[0]) + np.timedelta64( time_seq_length - 1, 'D') t_range[0] = np.datetime_as_string(time_start, unit='D') ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join(self.config_data_2.data_path["Out"], "ts_fig.png")) # plot nse ecdf sites_df_nse = pd.DataFrame({ "sites": sites, keys[2]: inds_test[keys[2]] }) plot_ecdf(sites_df_nse, keys[2]) # plot map gauge_dict = data_model.data_source.gage_dict plot_map(gauge_dict, sites_df_nse, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def load_result(save_dir, epoch, pred_name='flow_pred', obs_name='flow_obs'): """load the pred value of testing period and obs value""" flow_pred_file = os.path.join(save_dir, 'epoch' + str(epoch) + pred_name + '.npy') flow_obs_file = os.path.join(save_dir, 'epoch' + str(epoch) + obs_name + '.npy') pred = unserialize_numpy(flow_pred_file) obs = unserialize_numpy(flow_obs_file) return pred, obs
def test_plot_1by1(self): data_config = self.config_data.read_data_config() regions = data_config["regions"] data_model_test_lst = [] obs_lsts = [] pred_lsts = [] for i in range(1, len(regions) + 1): data_dir_i_temp = '/'.join( self.config_data.data_path['Temp'].split('/')[:-1]) data_dir_i = os.path.join(data_dir_i_temp, "exp" + str(i)) data_model_i = GagesModel.load_datamodel( data_dir_i, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_test_lst.append(data_model_i) flow_pred_file_i = os.path.join(data_dir_i, 'flow_pred.npy') flow_obs_file_i = os.path.join(data_dir_i, 'flow_obs.npy') preds = unserialize_numpy(flow_pred_file_i) obss = unserialize_numpy(flow_obs_file_i) obs_lsts.append(obss) pred_lsts.append(preds) # pred_final = unserialize_numpy(self.flow_pred_file) # obs_final = unserialize_numpy(self.flow_obs_file) obs_final = reduce(lambda a, b: np.vstack((a, b)), obs_lsts) pred_final = reduce(lambda a, b: np.vstack((a, b)), pred_lsts) gages_model_test = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_test = GagesModel.compact_data_model( data_model_test_lst, gages_model_test.data_source) plot_we_need(data_model_test, obs_final, pred_final, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def setUp(self): self.test_epoch = 20 flow_pred_file = os.path.join( self.dir_temp, "epoch" + str(self.test_epoch) + 'flow_pred.npy') flow_obs_file = os.path.join( self.dir_temp, "epoch" + str(self.test_epoch) + 'flow_obs.npy') pred = unserialize_numpy(flow_pred_file) obs = unserialize_numpy(flow_obs_file) self.pred = pred.reshape(pred.shape[0], pred.shape[1]) self.obs = obs.reshape(pred.shape[0], pred.shape[1]) # # 统计性能指标 self.inds = statError(self.obs, self.pred) # t_s_dict = unserialize_json(self.t_s_dict_file) # sites = np.array(t_s_dict["sites_id"]) self.keys = ["NSE"] self.inds_test = subset_of_dict(self.inds, self.keys)
def test_daymet_avg_from_diff(self): daymet_myself_file = os.path.join(self.netcdf_dir, "daymet_01013500_mean_2000_myself.npy") myself_data_tmax = unserialize_numpy(daymet_myself_file) camels_data = pd.read_csv(os.path.join(self.dir_db, "basin_timeseries_v1p2_metForcing_obsFlow/basin_dataset_public_v1p2/basin_mean_forcing/daymet/01/01013500_lump_cida_forcing_leap.txt"), sep=r'\s+', header=None, skiprows=4) camels_data_tmax = camels_data[8].values[7305:7670] gee_data = pd.read_csv(os.path.join(self.netcdf_dir, "daymet_01013500_mean_2000.csv")) gee_data_tmax = gee_data["tmax"].values print() print("Bias, RMSE, NSE", statError1d(myself_data_tmax, camels_data_tmax)) print("Bias, RMSE, NSE", statError1d(myself_data_tmax, gee_data_tmax)) print("Bias, RMSE, NSE", statError1d(camels_data_tmax, gee_data_tmax)) import matplotlib.pyplot as plt import seaborn as sns sns.set(style="whitegrid") values = np.array([myself_data_tmax, camels_data_tmax, gee_data_tmax]).T print(values) dates = pd.date_range("1 1 2000", periods=365, freq="D") data = pd.DataFrame(values, dates, columns=["myself", "camels", "gee"]) print(data) sns.lineplot(data=data.iloc[:, 0:2], palette="tab10", linewidth=2.5) plt.show() sns.lineplot(data=data.iloc[:, 1:3], palette="tab10", linewidth=2.5) plt.show()
def test_forecast(self): source_data = unserialize_pickle(self.data_source_test_file) # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5 stat_dict = unserialize_json(self.stat_file) data_flow = unserialize_numpy(self.flow_npy_file) data_forcing = unserialize_numpy(self.forcing_npy_file) data_attr = unserialize_numpy(self.attr_npy_file) # dictFactorize.json is the explanation of value of categorical variables var_dict = unserialize_json(self.var_dict_file) f_dict = unserialize_json(self.f_dict_file) t_s_dict = unserialize_json(self.t_s_dict_file) data_model_test = DataModel(source_data, data_flow, data_forcing, data_attr, var_dict, f_dict, stat_dict, t_s_dict) pred, obs = hydroDL.master_test(data_model_test) print(pred) print(obs) serialize_numpy(pred, self.flow_pred_file) serialize_numpy(obs, self.flow_obs_file)
def test_regions_seperate(self): data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gage_region_dir = data_model.data_source.all_configs.get("gage_region_dir") region_shapefiles = data_model.data_source.all_configs.get("regions") shapefiles = [os.path.join(gage_region_dir, region_shapefile + '.shp') for region_shapefile in region_shapefiles] df_id_region = np.array(data_model.t_s_dict["sites_id"]) assert (all(x < y for x, y in zip(df_id_region, df_id_region[1:]))) id_regions_idx = [] id_regions_sites_ids = [] for shapefile in shapefiles: shape_data = gpd.read_file(shapefile) gages_id = shape_data['GAGE_ID'].values c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True) assert (all(x < y for x, y in zip(ind1, ind1[1:]))) assert (all(x < y for x, y in zip(c, c[1:]))) id_regions_idx.append(ind1) id_regions_sites_ids.append(c) flow_pred_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_pred.npy') flow_obs_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_obs.npy') pred_all = unserialize_numpy(flow_pred_file) obs_all = unserialize_numpy(flow_obs_file) pred_all = pred_all.reshape(pred_all.shape[0], pred_all.shape[1]) obs_all = obs_all.reshape(obs_all.shape[0], obs_all.shape[1]) for i in range(len(id_regions_idx)): pred = pred_all[id_regions_idx[i], :] obs = obs_all[id_regions_idx[i], :] inds = statError(obs, pred) inds['STAID'] = id_regions_sites_ids[i] inds_df = pd.DataFrame(inds) inds_df.to_csv(os.path.join(self.config_data.data_path["Out"], region_shapefiles[i] + "epoch" + str(self.test_epoch) + 'data_df.csv'))