def test_train_julian(self): with torch.cuda.device(1): data_model1 = GagesModel.load_datamodel( self.config_data_natflow.data_path["Temp"], "1", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model1.update_model_param('train', nEpoch=300) data_model2 = GagesModel.load_datamodel( self.config_data_lstm.data_path["Temp"], "2", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model = GagesJulianDataModel(data_model1, data_model2) pre_trained_model_epoch = 270 master_train_natural_flow(data_model, pre_trained_model_epoch)
def test_gages_data_model_quickdata(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel(data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel(data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_train = GagesModel.update_data_model(self.config_data, data_model_train, data_attr_update=True, screen_basin_area_huc4=False) gages_model_test = GagesModel.update_data_model(self.config_data, data_model_test, data_attr_update=True, train_stat_dict=gages_model_train.stat_dict, screen_basin_area_huc4=False) save_datamodel(gages_model_train, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def test_inv_train(self): with torch.cuda.device(2): df1 = GagesModel.load_datamodel( self.config_data_1.data_path["Temp"], "1", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') df2 = GagesModel.load_datamodel( self.config_data_2.data_path["Temp"], "2", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model = GagesInvDataModel(df1, df2) pre_trained_model_epoch = 285 # train_lstm_inv(data_model) train_lstm_inv(data_model, pre_trained_model_epoch=pre_trained_model_epoch)
def test_test_gages_sim(self): with torch.cuda.device(1): data_model1 = GagesModel.load_datamodel(self.config_data_natflow.data_path["Temp"], "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model1.update_model_param('train', nEpoch=300) data_model2 = GagesModel.load_datamodel(self.config_data_lstm.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') model_input = GagesSimDataModel(data_model1, data_model2) pred, obs = master_test_natural_flow(model_input, epoch=self.test_epoch) basin_area = model_input.data_model2.data_source.read_attr(model_input.data_model2.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = model_input.data_model2.data_source.read_attr(model_input.data_model2.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result(model_input.data_model2.data_source.data_config.data_path['Temp'], str(self.test_epoch), pred, obs)
def test_data_model(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus_85-05_nan-0.1_00-1.0") data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_test = GagesModel.update_data_model( self.config_data, data_model_test) save_datamodel(gages_model_test, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') print("read and save data model")
def test_train_gages_sim(self): with torch.cuda.device(1): # load model from npy data and then update some params for the test func data_model1 = GagesModel.load_datamodel( self.config_data.data_path["Temp"], "1", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model1.update_model_param('train', nEpoch=300) data_model2 = GagesModel.load_datamodel( self.config_data.data_path["Temp"], "2", data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model = GagesSimDataModel(data_model1, data_model2) pre_trained_model_epoch = 150 master_train_natural_flow(data_model, pre_trained_model_epoch)
def test_dam_train(self): with torch.cuda.device(0): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "allnonref_85-05_nan-0.1_00-1.0") data_model_8595 = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') gages_model_train = GagesModel.update_data_model( self.config_data, data_model_8595) nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") nid_input = NidModel.load_nidmodel( nid_dir, nid_file=self.nid_file, nid_source_file_name='nid_source.txt', nid_data_file_name='nid_data.shp') gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) data_input = GagesDamDataModel(gages_model_train, nid_input, True, gage_main_dam_purpose) gages_input = choose_which_purpose(data_input) master_train(gages_input)
def test_test_gages(self): data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') with torch.cuda.device(1): data_models = GagesModel.every_model(data_model) obs_lst = [] pred_lst = [] for i in range(len(data_models)): print("\n", "Testing model", str(i + 1), ":\n") pred, obs = master_test(data_models[i]) basin_area = data_models[i].data_source.read_attr(data_models[i].t_s_dict["sites_id"], ['area_gages2'], is_return_dict=False) mean_prep = data_models[i].data_source.read_attr(data_models[i].t_s_dict["sites_id"], ['p_mean'], is_return_dict=False) pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) obs_lst.append(obs.flatten()) pred_lst.append(pred.flatten()) preds = np.array(pred_lst) obss = np.array(obs_lst) plot_we_need(data_model, obss, preds, id_col="id", lon_col="lon", lat_col="lat")
def test_storage_test(self): with torch.cuda.device(0): df1 = GagesModel.load_datamodel(self.config_data_natflow.data_path["Temp"], "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') df1.update_model_param('train', nEpoch=300) df2 = GagesModel.load_datamodel(self.config_data_storage.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model = GagesStorageDataModel(df1, df2) test_epoch = self.test_epoch pred, obs = test_lstm_storage(data_model, epoch=test_epoch) basin_area = df2.data_source.read_attr(df2.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = df2.data_source.read_attr(df2.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result(df2.data_source.data_config.data_path['Temp'], test_epoch, pred, obs)
def test_compact_data_model(self): data_dir_temp = '/'.join( self.config_data.data_path['Temp'].split('/')[:-1]) data_dir = os.path.join(data_dir_temp, "exp" + str(1)) data_model = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_lst = [] for j in range(0, 2): # data_model_i.data_flow.shape[0] data_models_j = GagesModel.which_data_model(data_model, j) data_model_lst.append(data_models_j) gages_model_test = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_test = GagesModel.compact_data_model( data_model_lst, gages_model_test.data_source) print(data_model_test)
def test_test_gages_iter(self): data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') with torch.cuda.device(1): obs_lst = [] pred_lst = [] for i in range(0, data_model.data_flow.shape[0]): print("\n", "Testing model", str(i + 1), ":\n") data_models_i = GagesModel.which_data_model(data_model, i) pred, obs = master_test_1by1(data_models_i) basin_area = data_models_i.data_source.read_attr(data_models_i.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = data_models_i.data_source.read_attr(data_models_i.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) obs_lst.append(obs.flatten()) pred_lst.append(pred.flatten()) preds = np.array(pred_lst) obss = np.array(obs_lst) flow_pred_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_pred') flow_obs_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_obs') serialize_numpy(preds, flow_pred_file) serialize_numpy(obss, flow_obs_file)
def test_dam_test(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_train = GagesModel.update_data_model( self.config_data, data_model_train) gages_model_test = GagesModel.update_data_model( self.config_data, data_model_test, train_stat_dict=gages_model_train.stat_dict) nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") nid_input = NidModel.load_nidmodel( nid_dir, nid_file=self.nid_file, nid_source_file_name='nid_source.txt', nid_data_file_name='nid_data.shp') gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) data_input = GagesDamDataModel(gages_model_test, nid_input, True, gage_main_dam_purpose) gages_input = choose_which_purpose(data_input) pred, obs = master_test(gages_input) basin_area = gages_input.data_source.read_attr( gages_input.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = gages_input.data_source.read_attr( gages_input.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result(gages_input.data_source.data_config.data_path['Temp'], self.test_epoch, pred, obs)
def test_test_gages_iter(self): data_config = self.config_data.read_data_config() regions = data_config["regions"] data_model_test_lst = [] with torch.cuda.device(1): obs_lsts = [] pred_lsts = [] for i in range(1, len(regions) + 1): data_dir_i_temp = '/'.join( self.config_data.data_path['Temp'].split('/')[:-1]) data_dir_i = os.path.join(data_dir_i_temp, "exp" + str(i)) data_model_i = GagesModel.load_datamodel( data_dir_i, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_test_lst.append(data_model_i) obs_lst = [] pred_lst = [] for j in range(0, data_model_i.data_flow.shape[0]): print("\n", "Testing model", str(j + 1), "of", regions[i - 1], "region", ":\n") data_models_j = GagesModel.which_data_model( data_model_i, j) pred, obs = master_test_1by1(data_models_j) basin_area = data_models_j.data_source.read_attr( data_models_j.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = data_models_j.data_source.read_attr( data_models_j.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) obs_lst.append(obs.flatten()) pred_lst.append(pred.flatten()) preds = np.array(pred_lst) obss = np.array(obs_lst) obs_lsts.append(obss) pred_lsts.append(preds) obs_final = reduce(lambda a, b: np.vstack((a, b)), obs_lsts) pred_final = reduce(lambda a, b: np.vstack((a, b)), pred_lsts) serialize_numpy(pred_final, self.flow_pred_file) serialize_numpy(obs_final, self.flow_obs_file)
def test_purposes_inds(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "allnonref-dam_95-05_nan-0.1_00-1.0") data_model = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_data_model = GagesModel.update_data_model( self.config_data, data_model) nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) gage_main_dam_purpose_lst = list(gage_main_dam_purpose.values()) gage_main_dam_purpose_unique = np.unique(gage_main_dam_purpose_lst) purpose_regions = {} for i in range(gage_main_dam_purpose_unique.size): sites_id = [] for key, value in gage_main_dam_purpose.items(): if value == gage_main_dam_purpose_unique[i]: sites_id.append(key) assert (all(x < y for x, y in zip(sites_id, sites_id[1:]))) purpose_regions[gage_main_dam_purpose_unique[i]] = sites_id id_regions_idx = [] id_regions_sites_ids = [] df_id_region = np.array(gages_data_model.t_s_dict["sites_id"]) for key, value in purpose_regions.items(): gages_id = value c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True) assert (all(x < y for x, y in zip(ind1, ind1[1:]))) assert (all(x < y for x, y in zip(c, c[1:]))) id_regions_idx.append(ind1) id_regions_sites_ids.append(c) preds, obss, inds_dfs = split_results_to_regions( gages_data_model, self.test_epoch, id_regions_idx, id_regions_sites_ids) region_names = list(purpose_regions.keys()) inds_medians = [] inds_means = [] for i in range(len(region_names)): inds_medians.append(inds_dfs[i].median(axis=0)) inds_means.append(inds_dfs[i].mean(axis=0)) print(inds_medians) print(inds_means)
def test_dam_train(self): quick_data_dir = os.path.join(self.config_data_1.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "allnonref_85-05_nan-0.1_00-1.0") # for inv model, datamodel of train and test are same data_model_8595 = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') t_range1_train = self.config_data_1.model_dict["data"]["tRangeTrain"] gages_model1_train = GagesModel.update_data_model( self.config_data_1, data_model_8595, t_range_update=t_range1_train, data_attr_update=True) t_range2_train = self.config_data_2.model_dict["data"]["tRangeTrain"] gages_model2_train = GagesModel.update_data_model( self.config_data_2, data_model_8595, t_range_update=t_range2_train, data_attr_update=True) nid_dir = os.path.join( "/".join(self.config_data_1.data_path["DB"].split("/")[:-1]), "nid", "quickdata") nid_input = NidModel.load_nidmodel( nid_dir, nid_file=self.nid_file, nid_source_file_name='nid_source.txt', nid_data_file_name='nid_data.shp') gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) gage_main_dam_purpose_lst = list(gage_main_dam_purpose.values()) gage_main_dam_purpose_unique = np.unique(gage_main_dam_purpose_lst) with torch.cuda.device(1): for i in range(0, gage_main_dam_purpose_unique.size): data_input1 = GagesDamDataModel(gages_model1_train, nid_input, True, gage_main_dam_purpose) gages_input1 = choose_which_purpose( data_input1, purpose=gage_main_dam_purpose_unique[i]) data_input2 = GagesDamDataModel(gages_model2_train, nid_input, True, gage_main_dam_purpose) gages_input2 = choose_which_purpose( data_input2, purpose=gage_main_dam_purpose_unique[i]) data_model = GagesInvDataModel(gages_input1, gages_input2) # pre_trained_model_epoch = 165 train_lstm_inv(data_model)
def test_gages_dam_all_save(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") data_model_train = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') gages_model_train = GagesModel.update_data_model( self.config_data, data_model_train) data_model_test = GagesModel.load_datamodel( data_dir, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gages_model_test = GagesModel.update_data_model( self.config_data, data_model_test, train_stat_dict=gages_model_train.stat_dict) nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "test") nid_input = NidModel.load_nidmodel( nid_dir, nid_source_file_name='nid_source.txt', nid_data_file_name='nid_data.shp') gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) data_input = GagesDamDataModel(gages_model_test, nid_input, gage_main_dam_purpose) data_model_dam = choose_which_purpose(data_input) save_datamodel(data_model_dam, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow', forcing_file_name='test_forcing', attr_file_name='test_attr', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json')
def test_forecast_test(self): sim_df = GagesModel.load_datamodel( self.sim_config_data.data_path["Temp"], "1", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') df = GagesModel.load_datamodel( self.config_data.data_path["Temp"], "2", data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_input = GagesForecastDataModel(sim_df, df) pred, obs = test_lstm_forecast(data_input) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) show_me_num = 5 t_s_dict = data_input.model_data.t_s_dict sites = np.array(t_s_dict["sites_id"]) t_range = np.array(t_s_dict["t_final_range"]) time_seq_length = data_input.model_data.data_source.data_config.model_dict[ 'model']['seqLength'] time_start = np.datetime64(t_range[0]) + np.timedelta64( time_seq_length, 'D') t_range[0] = np.datetime_as_string(time_start, unit='D') ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num) ts_fig.savefig( os.path.join(self.config_data.data_path["Out"], "ts_fig.png")) # # plot box,使用seaborn库 keys = ["Bias", "RMSE", "NSE"] inds_test = subset_of_dict(inds, keys) box_fig = plot_diff_boxes(inds_test) box_fig.savefig( os.path.join(self.config_data.data_path["Out"], "box_fig.png")) # plot map sites_df = pd.DataFrame({"sites": sites, keys[2]: inds_test[keys[2]]}) plot_ind_map(df.data_source.all_configs['gage_point_file'], sites_df)
def test_plot_1by1(self): data_config = self.config_data.read_data_config() regions = data_config["regions"] data_model_test_lst = [] obs_lsts = [] pred_lsts = [] for i in range(1, len(regions) + 1): data_dir_i_temp = '/'.join( self.config_data.data_path['Temp'].split('/')[:-1]) data_dir_i = os.path.join(data_dir_i_temp, "exp" + str(i)) data_model_i = GagesModel.load_datamodel( data_dir_i, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_test_lst.append(data_model_i) flow_pred_file_i = os.path.join(data_dir_i, 'flow_pred.npy') flow_obs_file_i = os.path.join(data_dir_i, 'flow_obs.npy') preds = unserialize_numpy(flow_pred_file_i) obss = unserialize_numpy(flow_obs_file_i) obs_lsts.append(obss) pred_lsts.append(preds) # pred_final = unserialize_numpy(self.flow_pred_file) # obs_final = unserialize_numpy(self.flow_obs_file) obs_final = reduce(lambda a, b: np.vstack((a, b)), obs_lsts) pred_final = reduce(lambda a, b: np.vstack((a, b)), pred_lsts) gages_model_test = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_test = GagesModel.compact_data_model( data_model_test_lst, gages_model_test.data_source) plot_we_need(data_model_test, obs_final, pred_final, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def test_comp_result(self): for i in range(self.split_num): data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model_majordam = GagesModel.load_datamodel(self.config_data.data_path["Temp"], str(i), data_source_file_name='test_data_source_majordam.txt', stat_file_name='test_Statistics_majordam.json', flow_file_name='test_flow_majordam.npy', forcing_file_name='test_forcing_majordam.npy', attr_file_name='test_attr_majordam.npy', f_dict_file_name='test_dictFactorize_majordam.json', var_dict_file_name='test_dictAttribute_majordam.json', t_s_dict_file_name='test_dictTimeSpace_majordam.json') pred, obs = load_result(data_model.data_source.data_config.data_path['Temp'], self.test_epoch) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) pred_majordam, obs_majordam = load_result(data_model_majordam.data_source.data_config.data_path['Temp'], self.test_epoch, pred_name='flow_pred_majordam', obs_name='flow_obs_majordam') pred_majordam = pred_majordam.reshape(pred_majordam.shape[0], pred_majordam.shape[1]) obs_majordam = obs_majordam.reshape(obs_majordam.shape[0], obs_majordam.shape[1]) inds_majordam = statError(obs_majordam, pred_majordam) inds_majordam['STAID'] = data_model_majordam.t_s_dict["sites_id"] inds_majordam_df = pd.DataFrame(inds_majordam) keys_nse = "NSE" xs = [] ys = [] cases_exps_legends_together = ["PUB_test_in_no-major-dam_basins", "PUB_test_in_major-dam_basins"] x1, y1 = ecdf(inds_df[keys_nse]) xs.append(x1) ys.append(y1) x2, y2 = ecdf(inds_majordam_df[keys_nse]) xs.append(x2) ys.append(y2) plot_ecdfs(xs, ys, cases_exps_legends_together)
def test_explore_train_datamodel(self): df = GagesModel.load_datamodel(self.config_data.data_path["Temp"], data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') data_input = GagesExploreDataModel(df) data_models = data_input.cluster_datamodel( num_cluster=self.num_cluster) count = 0 for data_model in data_models: print("saving model", str(count + 1), "\n") save_datamodel(data_model, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow', forcing_file_name='forcing', attr_file_name='attr', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') count += 1
def test_da_test(self): with torch.cuda.device(2): df1 = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') data_model = GagesDaDataModel(df1) pred, obs = test_lstm_da(data_model, epoch=300) basin_area = data_model.data_model.data_source.read_attr( data_model.data_model.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = data_model.data_model.data_source.read_attr( data_model.data_model.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) flow_pred_file = os.path.join( data_model.data_model.data_source.data_config. data_path['Temp'], 'flow_pred') flow_obs_file = os.path.join( data_model.data_model.data_source.data_config. data_path['Temp'], 'flow_obs') serialize_numpy(pred, flow_pred_file) serialize_numpy(obs, flow_obs_file)
def test_export_result(self): data_model = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') flow_pred_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_pred.npy') flow_obs_file = os.path.join( data_model.data_source.data_config.data_path['Temp'], 'flow_obs.npy') pred = unserialize_numpy(flow_pred_file) obs = unserialize_numpy(flow_obs_file) pred = pred.reshape(pred.shape[0], pred.shape[1]) obs = obs.reshape(obs.shape[0], obs.shape[1]) inds = statError(obs, pred) inds['STAID'] = data_model.t_s_dict["sites_id"] inds_df = pd.DataFrame(inds) inds_df.to_csv( os.path.join(self.config_data.data_path["Out"], 'data_df.csv'))
def test_gages_dam_attr(self): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") df = GagesModel.load_datamodel(data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') # nid_input = NidModel() nid_input = NidModel(self.config_data.config_file) # nid_dir = os.path.join("/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "test") save_nidinput(nid_input, nid_dir, nid_source_file_name='nid_source.txt', nid_data_file_name='nid_data.shp') data_input = GagesDamDataModel(df, nid_input) serialize_json(data_input.gage_main_dam_purpose, os.path.join(nid_dir, "dam_main_purpose_dict.json"))
def test_dam_test(self): with torch.cuda.device(0): gages_input = GagesModel.load_datamodel( self.config_data.data_path["Temp"], data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') pred, obs = master_test(gages_input, epoch=cfg.TEST_EPOCH) basin_area = gages_input.data_source.read_attr( gages_input.t_s_dict["sites_id"], ['DRAIN_SQKM'], is_return_dict=False) mean_prep = gages_input.data_source.read_attr( gages_input.t_s_dict["sites_id"], ['PPTAVG_BASIN'], is_return_dict=False) mean_prep = mean_prep / 365 * 10 pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False) obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False) save_result(gages_input.data_source.data_config.data_path['Temp'], cfg.TEST_EPOCH, pred, obs) plot_we_need(gages_input, obs, pred, id_col="STAID", lon_col="LNG_GAGE", lat_col="LAT_GAGE")
def test_dam_train(self): """just test for one purpose as a case""" with torch.cuda.device(2): quick_data_dir = os.path.join(self.config_data.data_path["DB"], "quickdata") data_dir = os.path.join(quick_data_dir, "conus-all_90-10_nan-0.0_00-1.0") df = GagesModel.load_datamodel( data_dir, data_source_file_name='data_source.txt', stat_file_name='Statistics.json', flow_file_name='flow.npy', forcing_file_name='forcing.npy', attr_file_name='attr.npy', f_dict_file_name='dictFactorize.json', var_dict_file_name='dictAttribute.json', t_s_dict_file_name='dictTimeSpace.json') nid_dir = os.path.join( "/".join(self.config_data.data_path["DB"].split("/")[:-1]), "nid", "quickdata") nid_input = NidModel.load_nidmodel( nid_dir, nid_file=self.nid_file, nid_source_file_name='nid_source.txt', nid_data_file_name='nid_data.shp') gage_main_dam_purpose = unserialize_json( os.path.join(nid_dir, "dam_main_purpose_dict.json")) data_input = GagesDamDataModel(df, nid_input, True, gage_main_dam_purpose) purpose_chosen = 'C' gages_input = choose_which_purpose(data_input, purpose=purpose_chosen) master_train(gages_input)
def test_plot_map(self): data_model = GagesModel.load_datamodel( self.dir_temp, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') gauge_dict = data_model.data_source.gage_dict t_s_dict = unserialize_json(self.t_s_dict_file) sites = np.array(t_s_dict["sites_id"]) keys = ["NSE"] inds_test = subset_of_dict(self.inds, keys) sites_df = pd.DataFrame({"sites": sites, keys[0]: inds_test[keys[0]]}) nse_range = [0, 1] idx_lstl_nse = sites_df[(sites_df[keys[0]] >= nse_range[0]) & ( sites_df[keys[0]] <= nse_range[1])].index.tolist() colorbar_size = [0.91, 0.323, 0.02, 0.346] # colorbar_size = None plot_gages_map(data_model, sites_df, keys[0], idx_lstl_nse, colorbar_size=colorbar_size, cbar_font_size=14) plt.savefig(os.path.join(self.dir_out, 'map_NSE.png'), dpi=500, bbox_inches="tight") plt.show()
def test_plot_map_cartopy(self): data_model = GagesModel.load_datamodel( self.dir_temp, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') show_ind_key = "NSE" inds_df = pd.DataFrame(self.inds) # nse_range = [-10000, 0] nse_range = [0, 1] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() plot_gages_map_and_ts(data_model, self.obs, self.pred, inds_df, show_ind_key, idx_lst_nse, pertile_range=[0, 100], plot_ts=False, fig_size=(8, 4), cmap_str="jet") plt.show()
def test_plot_delta_map_and_box(self): data_model = GagesModel.load_datamodel( self.dir_temp, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') show_ind_key = "NSE" inds_df = pd.DataFrame(self.inds) inds_df1 = pd.DataFrame(self.inds) inds_delta = inds_df - inds_df1 print(inds_delta) inds_df = pd.DataFrame(self.inds)[show_ind_key] inds_df_fake = inds_df.copy() temp = np.random.uniform(-1, 1, inds_df_fake.size) comp_df = inds_df_fake + temp delta_nse = (comp_df - inds_df).to_frame() delta_range = [-0.9, 0.9] idx_lst_delta = delta_nse[ (delta_nse[show_ind_key] >= delta_range[0]) & (delta_nse[show_ind_key] < delta_range[1])].index.tolist() fig = plot_gages_map_and_box(data_model, delta_nse, show_ind_key, idx_lst=idx_lst_delta, titles=["NSE map", "NSE boxplot"], wh_ratio=[1, 5], adjust_xy=(0, 0.04)) # save figure without padding # plt.savefig('testmapbox.png', dpi=500, bbox_inches="tight") plt.show()
def test_plot_map_and_box(self): data_model = GagesModel.load_datamodel( self.dir_temp, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') show_ind_key = "NSE" inds_df = pd.DataFrame(self.inds) # nse_range = [-10000, 0] nse_range = [0, 1] idx_lst_nse = inds_df[(inds_df[show_ind_key] >= nse_range[0]) & ( inds_df[show_ind_key] < nse_range[1])].index.tolist() fig = plot_gages_map_and_box(data_model, inds_df, show_ind_key, idx_lst_nse, titles=["NSE map", "NSE boxplot"], wh_ratio=[1, 5], adjust_xy=(0, 0.04)) plt.show()
def test_plot_map_dam(self): data_model = GagesModel.load_datamodel( self.dir_temp, data_source_file_name='test_data_source.txt', stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy', forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy', f_dict_file_name='test_dictFactorize.json', var_dict_file_name='test_dictAttribute.json', t_s_dict_file_name='test_dictTimeSpace.json') usgs_id = data_model.t_s_dict["sites_id"] assert (all(x < y for x, y in zip(usgs_id, usgs_id[1:]))) # attr_dam_lst = ["NDAMS_2009"] attr_dam_lst = ["STOR_NOR_2009"] data_attr, var_dict, f_dict = data_model.data_source.read_attr( usgs_id, attr_dam_lst) show_ind_key_dam = attr_dam_lst[0] inds_df_dam_num = pd.DataFrame({show_ind_key_dam: data_attr[:, 0]}) dam_num_range = [1, 500] idx_lst_dam_num = inds_df_dam_num[ (inds_df_dam_num[show_ind_key_dam] >= dam_num_range[0]) & (inds_df_dam_num[show_ind_key_dam] < dam_num_range[1] )].index.tolist() fig = plot_gages_map_and_box(data_model, inds_df_dam_num, show_ind_key_dam, idx_lst_dam_num, titles=["dam map", "dam boxplot"], wh_ratio=[1, 5], adjust_xy=(0, 0.04)) plt.show()