Ejemplo n.º 1
0
 def load_datamodel(cls, dir_temp_orgin, num_str=None, **kwargs):
     if num_str:
         dir_temp = os.path.join(dir_temp_orgin, num_str)
     else:
         dir_temp = dir_temp_orgin
     data_source_file = os.path.join(dir_temp,
                                     kwargs['data_source_file_name'])
     stat_file = os.path.join(dir_temp, kwargs['stat_file_name'])
     flow_npy_file = os.path.join(dir_temp, kwargs['flow_file_name'])
     forcing_npy_file = os.path.join(dir_temp, kwargs['forcing_file_name'])
     attr_npy_file = os.path.join(dir_temp, kwargs['attr_file_name'])
     f_dict_file = os.path.join(dir_temp, kwargs['f_dict_file_name'])
     var_dict_file = os.path.join(dir_temp, kwargs['var_dict_file_name'])
     t_s_dict_file = os.path.join(dir_temp, kwargs['t_s_dict_file_name'])
     source_data = unserialize_pickle(data_source_file)
     # save data_model because of the low speed of serialization of data_model: dict -> json,data -> npy
     stat_dict = unserialize_json(stat_file)
     data_flow = unserialize_numpy(flow_npy_file)
     data_forcing = unserialize_numpy(forcing_npy_file)
     data_attr = unserialize_numpy(attr_npy_file)
     # dictFactorize.json is the explanation of value of categorical variables
     var_dict = unserialize_json(var_dict_file)
     f_dict = unserialize_json(f_dict_file)
     t_s_dict = unserialize_json(t_s_dict_file)
     data_model = cls(source_data, data_flow, data_forcing, data_attr,
                      var_dict, f_dict, stat_dict, t_s_dict)
     return data_model
Ejemplo n.º 2
0
    def test_trans_all_forcing_file_to_camels(self):
        data_source_dump = os.path.join(self.config_data.data_path["Temp"],
                                        'data_source.txt')
        source_data = unserialize_pickle(data_source_dump)
        output_dir = os.path.join(self.config_data.data_path["DB"],
                                  "basin_mean_forcing", "daymet")
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        region_names = [
            region_temp.split("_")[-1]
            for region_temp in source_data.all_configs['regions']
        ]
        # forcing data file generated is named as "allref", so rename the "all"
        region_names = ["allref" if r == "all" else r for r in region_names]
        year_start = int(source_data.t_range[0].split("-")[0])
        year_end = int(source_data.t_range[1].split("-")[0])
        years = np.arange(year_start, year_end)
        assert (all(x < y for x, y in zip(source_data.gage_dict['STAID'],
                                          source_data.gage_dict['STAID'][1:])))

        config_dir = definitions.CONFIG_DIR
        for i in range(len(region_names)):
            config_file_i = os.path.join(
                config_dir, "transdata/config_exp" + str(i + 1) + ".ini")
            subdir_i = "transdata/exp" + str(i + 1)
            config_data_i = GagesConfig.set_subdir(config_file_i, subdir_i)
            source_data_i = GagesSource(
                config_data_i,
                config_data_i.model_dict["data"]["tRangeTrain"],
                screen_basin_area_huc4=False)
            for year in years:
                trans_daymet_to_camels(source_data.all_configs["forcing_dir"],
                                       output_dir, source_data_i.gage_dict,
                                       region_names[i], year)
Ejemplo n.º 3
0
 def test_data_model_test(self):
     source_data = unserialize_pickle(self.data_source_test_file)
     data_model = DataModel(source_data)
     # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5
     serialize_json(data_model.stat_dict, self.stat_file)
     serialize_numpy(data_model.data_flow, self.flow_file)
     serialize_numpy(data_model.data_forcing, self.forcing_file)
     serialize_numpy(data_model.data_attr, self.attr_file)
     # dictFactorize.json is the explanation of value of categorical variables
     serialize_json(data_model.f_dict, self.f_dict_file)
     serialize_json(data_model.var_dict, self.var_dict_file)
     serialize_json(data_model.t_s_dict, self.t_s_dict_file)
Ejemplo n.º 4
0
    def test_choose_some_gauge(self):
        ashu_gageid_file = os.path.join(self.config_data.data_path["DB"],
                                        "ashu", "AshuGagesId.txt")

        # farshid_gageid_file = os.path.join(self.config_data.data_path["DB"], "farshid", "sites.csv")
        farshid_gageid_file = os.path.join(self.config_data.data_path["DB"],
                                           "farshid", "gagelist1713.feather")

        # dapeng_v1_gageid_file = os.path.join(self.config_data.data_path["DB"], "dapeng", "v1.csv")
        dapeng_v2_gageid_file = os.path.join(self.config_data.data_path["DB"],
                                             "dapeng", "v2.csv")

        # gauge_df = pd.read_csv(dapeng_v2_gageid_file, dtype={"STAID": str})
        # gauge_list = gauge_df["STAID"].values
        gauge_df = pd.read_feather(farshid_gageid_file)
        gauge_list = gauge_df["site_no"].values

        # np.array(
        #     ['01013500', '01401650', '01585500', '02120780', '02324400', '03139000', '04086600', '05087500',
        #      '05539900', '06468170', '07184000', '08158810', '09404450', '11055800', '12134500', '14166500'])
        data_dir = os.path.join(self.config_data.data_path["DB"],
                                "basin_mean_forcing", "daymet")
        # output_dir = os.path.join(self.config_data.data_path["DB"], "forcing_data_ashu")
        output_dir = os.path.join(self.config_data.data_path["DB"],
                                  "forcing_data_farshid")
        # output_dir = os.path.join(self.config_data.data_path["DB"], "forcing_data_dapeng_v1")
        # output_dir = os.path.join(self.config_data.data_path["DB"], "forcing_data_dapeng_v2")
        if not os.path.isdir(output_dir):
            os.mkdir(output_dir)
        data_source_dump = os.path.join(self.config_data.data_path["Temp"],
                                        'data_source.txt')
        source_data = unserialize_pickle(data_source_dump)
        gageids = np.array(source_data.gage_dict['STAID'])
        xy, x_ind, y_ind = np.intersect1d(gauge_list,
                                          gageids,
                                          return_indices=True)
        index = np.array([np.where(gageids == i) for i in xy]).flatten()
        print(index)
        for j in index:
            huc_id = source_data.gage_dict['HUC02'][j]
            data_huc_dir = os.path.join(data_dir, huc_id)
            src = os.path.join(
                data_huc_dir, source_data.gage_dict['STAID'][j] +
                '_lump_daymet_forcing_leap.txt')
            output_huc_dir = os.path.join(output_dir, huc_id)
            if not os.path.isdir(output_huc_dir):
                os.mkdir(output_huc_dir)
            dst = os.path.join(
                output_huc_dir, source_data.gage_dict['STAID'][j] +
                '_lump_daymet_forcing_leap.txt')
            print("write into", dst)
            shutil.copy(src, dst)
Ejemplo n.º 5
0
 def test_trans_all_forcing_file_to_camels(self):
     """the function need to be run region by region"""
     data_source_dump = os.path.join(self.config_data.data_path["Temp"], 'data_source.txt')
     source_data = unserialize_pickle(data_source_dump)
     output_dir = os.path.join(self.config_data.data_path["DB"], "basin_mean_forcing", "daymet")
     if not os.path.isdir(output_dir):
         os.mkdir(output_dir)
     region_names = [region_temp.split("_")[-1] for region_temp in source_data.all_configs['regions']]
     # forcing data file generated is named as "allref", so rename the "all"
     region_names = ["allref" if r == "all" else r for r in region_names]
     year_start = int(source_data.t_range[0].split("-")[0])
     year_end = int(source_data.t_range[1].split("-")[0])
     years = np.arange(year_start, year_end)
     assert (all(x < y for x, y in zip(source_data.gage_dict['STAID'], source_data.gage_dict['STAID'][1:])))
     for year in years:
         trans_daymet_to_camels(source_data.all_configs["forcing_dir"], output_dir, source_data.gage_dict,
                                region_names[0], year)
Ejemplo n.º 6
0
 def test_forecast(self):
     source_data = unserialize_pickle(self.data_source_test_file)
     # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5
     stat_dict = unserialize_json(self.stat_file)
     data_flow = unserialize_numpy(self.flow_npy_file)
     data_forcing = unserialize_numpy(self.forcing_npy_file)
     data_attr = unserialize_numpy(self.attr_npy_file)
     # dictFactorize.json is the explanation of value of categorical variables
     var_dict = unserialize_json(self.var_dict_file)
     f_dict = unserialize_json(self.f_dict_file)
     t_s_dict = unserialize_json(self.t_s_dict_file)
     data_model_test = DataModel(source_data, data_flow, data_forcing,
                                 data_attr, var_dict, f_dict, stat_dict,
                                 t_s_dict)
     pred, obs = hydroDL.master_test(data_model_test)
     print(pred)
     print(obs)
     serialize_numpy(pred, self.flow_pred_file)
     serialize_numpy(obs, self.flow_obs_file)