def load_datamodel(cls, dir_temp_orgin, num_str=None, **kwargs): if num_str: dir_temp = os.path.join(dir_temp_orgin, num_str) else: dir_temp = dir_temp_orgin data_source_file = os.path.join(dir_temp, kwargs['data_source_file_name']) stat_file = os.path.join(dir_temp, kwargs['stat_file_name']) flow_npy_file = os.path.join(dir_temp, kwargs['flow_file_name']) forcing_npy_file = os.path.join(dir_temp, kwargs['forcing_file_name']) attr_npy_file = os.path.join(dir_temp, kwargs['attr_file_name']) f_dict_file = os.path.join(dir_temp, kwargs['f_dict_file_name']) var_dict_file = os.path.join(dir_temp, kwargs['var_dict_file_name']) t_s_dict_file = os.path.join(dir_temp, kwargs['t_s_dict_file_name']) source_data = unserialize_pickle(data_source_file) # save data_model because of the low speed of serialization of data_model: dict -> json,data -> npy stat_dict = unserialize_json(stat_file) data_flow = unserialize_numpy(flow_npy_file) data_forcing = unserialize_numpy(forcing_npy_file) data_attr = unserialize_numpy(attr_npy_file) # dictFactorize.json is the explanation of value of categorical variables var_dict = unserialize_json(var_dict_file) f_dict = unserialize_json(f_dict_file) t_s_dict = unserialize_json(t_s_dict_file) data_model = cls(source_data, data_flow, data_forcing, data_attr, var_dict, f_dict, stat_dict, t_s_dict) return data_model
def test_trans_all_forcing_file_to_camels(self): data_source_dump = os.path.join(self.config_data.data_path["Temp"], 'data_source.txt') source_data = unserialize_pickle(data_source_dump) output_dir = os.path.join(self.config_data.data_path["DB"], "basin_mean_forcing", "daymet") if not os.path.isdir(output_dir): os.mkdir(output_dir) region_names = [ region_temp.split("_")[-1] for region_temp in source_data.all_configs['regions'] ] # forcing data file generated is named as "allref", so rename the "all" region_names = ["allref" if r == "all" else r for r in region_names] year_start = int(source_data.t_range[0].split("-")[0]) year_end = int(source_data.t_range[1].split("-")[0]) years = np.arange(year_start, year_end) assert (all(x < y for x, y in zip(source_data.gage_dict['STAID'], source_data.gage_dict['STAID'][1:]))) config_dir = definitions.CONFIG_DIR for i in range(len(region_names)): config_file_i = os.path.join( config_dir, "transdata/config_exp" + str(i + 1) + ".ini") subdir_i = "transdata/exp" + str(i + 1) config_data_i = GagesConfig.set_subdir(config_file_i, subdir_i) source_data_i = GagesSource( config_data_i, config_data_i.model_dict["data"]["tRangeTrain"], screen_basin_area_huc4=False) for year in years: trans_daymet_to_camels(source_data.all_configs["forcing_dir"], output_dir, source_data_i.gage_dict, region_names[i], year)
def test_data_model_test(self): source_data = unserialize_pickle(self.data_source_test_file) data_model = DataModel(source_data) # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5 serialize_json(data_model.stat_dict, self.stat_file) serialize_numpy(data_model.data_flow, self.flow_file) serialize_numpy(data_model.data_forcing, self.forcing_file) serialize_numpy(data_model.data_attr, self.attr_file) # dictFactorize.json is the explanation of value of categorical variables serialize_json(data_model.f_dict, self.f_dict_file) serialize_json(data_model.var_dict, self.var_dict_file) serialize_json(data_model.t_s_dict, self.t_s_dict_file)
def test_choose_some_gauge(self): ashu_gageid_file = os.path.join(self.config_data.data_path["DB"], "ashu", "AshuGagesId.txt") # farshid_gageid_file = os.path.join(self.config_data.data_path["DB"], "farshid", "sites.csv") farshid_gageid_file = os.path.join(self.config_data.data_path["DB"], "farshid", "gagelist1713.feather") # dapeng_v1_gageid_file = os.path.join(self.config_data.data_path["DB"], "dapeng", "v1.csv") dapeng_v2_gageid_file = os.path.join(self.config_data.data_path["DB"], "dapeng", "v2.csv") # gauge_df = pd.read_csv(dapeng_v2_gageid_file, dtype={"STAID": str}) # gauge_list = gauge_df["STAID"].values gauge_df = pd.read_feather(farshid_gageid_file) gauge_list = gauge_df["site_no"].values # np.array( # ['01013500', '01401650', '01585500', '02120780', '02324400', '03139000', '04086600', '05087500', # '05539900', '06468170', '07184000', '08158810', '09404450', '11055800', '12134500', '14166500']) data_dir = os.path.join(self.config_data.data_path["DB"], "basin_mean_forcing", "daymet") # output_dir = os.path.join(self.config_data.data_path["DB"], "forcing_data_ashu") output_dir = os.path.join(self.config_data.data_path["DB"], "forcing_data_farshid") # output_dir = os.path.join(self.config_data.data_path["DB"], "forcing_data_dapeng_v1") # output_dir = os.path.join(self.config_data.data_path["DB"], "forcing_data_dapeng_v2") if not os.path.isdir(output_dir): os.mkdir(output_dir) data_source_dump = os.path.join(self.config_data.data_path["Temp"], 'data_source.txt') source_data = unserialize_pickle(data_source_dump) gageids = np.array(source_data.gage_dict['STAID']) xy, x_ind, y_ind = np.intersect1d(gauge_list, gageids, return_indices=True) index = np.array([np.where(gageids == i) for i in xy]).flatten() print(index) for j in index: huc_id = source_data.gage_dict['HUC02'][j] data_huc_dir = os.path.join(data_dir, huc_id) src = os.path.join( data_huc_dir, source_data.gage_dict['STAID'][j] + '_lump_daymet_forcing_leap.txt') output_huc_dir = os.path.join(output_dir, huc_id) if not os.path.isdir(output_huc_dir): os.mkdir(output_huc_dir) dst = os.path.join( output_huc_dir, source_data.gage_dict['STAID'][j] + '_lump_daymet_forcing_leap.txt') print("write into", dst) shutil.copy(src, dst)
def test_trans_all_forcing_file_to_camels(self): """the function need to be run region by region""" data_source_dump = os.path.join(self.config_data.data_path["Temp"], 'data_source.txt') source_data = unserialize_pickle(data_source_dump) output_dir = os.path.join(self.config_data.data_path["DB"], "basin_mean_forcing", "daymet") if not os.path.isdir(output_dir): os.mkdir(output_dir) region_names = [region_temp.split("_")[-1] for region_temp in source_data.all_configs['regions']] # forcing data file generated is named as "allref", so rename the "all" region_names = ["allref" if r == "all" else r for r in region_names] year_start = int(source_data.t_range[0].split("-")[0]) year_end = int(source_data.t_range[1].split("-")[0]) years = np.arange(year_start, year_end) assert (all(x < y for x, y in zip(source_data.gage_dict['STAID'], source_data.gage_dict['STAID'][1:]))) for year in years: trans_daymet_to_camels(source_data.all_configs["forcing_dir"], output_dir, source_data.gage_dict, region_names[0], year)
def test_forecast(self): source_data = unserialize_pickle(self.data_source_test_file) # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5 stat_dict = unserialize_json(self.stat_file) data_flow = unserialize_numpy(self.flow_npy_file) data_forcing = unserialize_numpy(self.forcing_npy_file) data_attr = unserialize_numpy(self.attr_npy_file) # dictFactorize.json is the explanation of value of categorical variables var_dict = unserialize_json(self.var_dict_file) f_dict = unserialize_json(self.f_dict_file) t_s_dict = unserialize_json(self.t_s_dict_file) data_model_test = DataModel(source_data, data_flow, data_forcing, data_attr, var_dict, f_dict, stat_dict, t_s_dict) pred, obs = hydroDL.master_test(data_model_test) print(pred) print(obs) serialize_numpy(pred, self.flow_pred_file) serialize_numpy(obs, self.flow_obs_file)