Exemple #1
0
    def test_da_test(self):
        with torch.cuda.device(2):
            df1 = GagesModel.load_datamodel(
                self.config_data.data_path["Temp"],
                data_source_file_name='test_data_source.txt',
                stat_file_name='test_Statistics.json',
                flow_file_name='test_flow.npy',
                forcing_file_name='test_forcing.npy',
                attr_file_name='test_attr.npy',
                f_dict_file_name='test_dictFactorize.json',
                var_dict_file_name='test_dictAttribute.json',
                t_s_dict_file_name='test_dictTimeSpace.json')

            data_model = GagesDaDataModel(df1)
            pred, obs = test_lstm_da(data_model, epoch=300)
            basin_area = data_model.data_model.data_source.read_attr(
                data_model.data_model.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep = data_model.data_model.data_source.read_attr(
                data_model.data_model.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep = mean_prep / 365 * 10
            pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
            obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
            flow_pred_file = os.path.join(
                data_model.data_model.data_source.data_config.
                data_path['Temp'], 'flow_pred')
            flow_obs_file = os.path.join(
                data_model.data_model.data_source.data_config.
                data_path['Temp'], 'flow_obs')
            serialize_numpy(pred, flow_pred_file)
            serialize_numpy(obs, flow_obs_file)
Exemple #2
0
 def test_test_gages_iter(self):
     data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"],
                                            data_source_file_name='test_data_source.txt',
                                            stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy',
                                            forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy',
                                            f_dict_file_name='test_dictFactorize.json',
                                            var_dict_file_name='test_dictAttribute.json',
                                            t_s_dict_file_name='test_dictTimeSpace.json')
     with torch.cuda.device(1):
         obs_lst = []
         pred_lst = []
         for i in range(0, data_model.data_flow.shape[0]):
             print("\n", "Testing model", str(i + 1), ":\n")
             data_models_i = GagesModel.which_data_model(data_model, i)
             pred, obs = master_test_1by1(data_models_i)
             basin_area = data_models_i.data_source.read_attr(data_models_i.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                                                              is_return_dict=False)
             mean_prep = data_models_i.data_source.read_attr(data_models_i.t_s_dict["sites_id"],
                                                             ['PPTAVG_BASIN'],
                                                             is_return_dict=False)
             mean_prep = mean_prep / 365 * 10
             pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
             obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
             obs_lst.append(obs.flatten())
             pred_lst.append(pred.flatten())
         preds = np.array(pred_lst)
         obss = np.array(obs_lst)
         flow_pred_file = os.path.join(data_model.data_source.data_config.data_path['Temp'],
                                       'flow_pred')
         flow_obs_file = os.path.join(data_model.data_source.data_config.data_path['Temp'],
                                      'flow_obs')
         serialize_numpy(preds, flow_pred_file)
         serialize_numpy(obss, flow_obs_file)
Exemple #3
0
    def test_test_gages_iter(self):
        data_config = self.config_data.read_data_config()
        regions = data_config["regions"]
        data_model_test_lst = []
        with torch.cuda.device(1):
            obs_lsts = []
            pred_lsts = []
            for i in range(1, len(regions) + 1):
                data_dir_i_temp = '/'.join(
                    self.config_data.data_path['Temp'].split('/')[:-1])
                data_dir_i = os.path.join(data_dir_i_temp, "exp" + str(i))
                data_model_i = GagesModel.load_datamodel(
                    data_dir_i,
                    data_source_file_name='test_data_source.txt',
                    stat_file_name='test_Statistics.json',
                    flow_file_name='test_flow.npy',
                    forcing_file_name='test_forcing.npy',
                    attr_file_name='test_attr.npy',
                    f_dict_file_name='test_dictFactorize.json',
                    var_dict_file_name='test_dictAttribute.json',
                    t_s_dict_file_name='test_dictTimeSpace.json')
                data_model_test_lst.append(data_model_i)
                obs_lst = []
                pred_lst = []
                for j in range(0, data_model_i.data_flow.shape[0]):
                    print("\n", "Testing model", str(j + 1), "of",
                          regions[i - 1], "region", ":\n")
                    data_models_j = GagesModel.which_data_model(
                        data_model_i, j)
                    pred, obs = master_test_1by1(data_models_j)
                    basin_area = data_models_j.data_source.read_attr(
                        data_models_j.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                        is_return_dict=False)
                    mean_prep = data_models_j.data_source.read_attr(
                        data_models_j.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                        is_return_dict=False)
                    mean_prep = mean_prep / 365 * 10
                    pred = _basin_norm(pred,
                                       basin_area,
                                       mean_prep,
                                       to_norm=False)
                    obs = _basin_norm(obs,
                                      basin_area,
                                      mean_prep,
                                      to_norm=False)
                    obs_lst.append(obs.flatten())
                    pred_lst.append(pred.flatten())
                preds = np.array(pred_lst)
                obss = np.array(obs_lst)
                obs_lsts.append(obss)
                pred_lsts.append(preds)

            obs_final = reduce(lambda a, b: np.vstack((a, b)), obs_lsts)
            pred_final = reduce(lambda a, b: np.vstack((a, b)), pred_lsts)

            serialize_numpy(pred_final, self.flow_pred_file)
            serialize_numpy(obs_final, self.flow_obs_file)
Exemple #4
0
def save_result(save_dir,
                epoch,
                pred,
                obs,
                pred_name='flow_pred',
                obs_name='flow_obs'):
    """save the pred value of testing period and obs value"""
    flow_pred_file = os.path.join(save_dir, 'epoch' + str(epoch) + pred_name)
    flow_obs_file = os.path.join(save_dir, 'epoch' + str(epoch) + obs_name)
    serialize_numpy(pred, flow_pred_file)
    serialize_numpy(obs, flow_obs_file)
Exemple #5
0
 def test_basin_avg_netcdf(self):
     """读取netcdf文件,计算给定的shapefile代表的范围内该netcdf文件的给定变量的流域平均值
     算法需要优化:
     1.判断区域那块,可以根据bound迅速地排除大部分不需要判断的点,只判断在bound内的点
     2.其他的优化和shp_trans_coord下的差不多
     """
     # 先读取一个netcdf文件,然后把shapefile选择一张,先测试下上面的程序。
     file_path = self.netcdf_file
     shp_file = os.path.join(self.shpfile_folder, "01013500.shp")
     mask_file = os.path.join(self.shpfile_folder, "mask_01013500")
     avgs = basin_avg_netcdf(file_path, shp_file, mask_file)
     daymet_myself_file = os.path.join(self.netcdf_dir, "daymet_01013500_mean_2000_myself")
     serialize_numpy(np.array(avgs), daymet_myself_file)
Exemple #6
0
    def test_test_gages_sim(self):
        with torch.cuda.device(2):
            data_model1 = GagesModel.load_datamodel(
                self.config_data_lstm.data_path["Temp"],
                "1",
                data_source_file_name='test_data_source.txt',
                stat_file_name='test_Statistics.json',
                flow_file_name='test_flow.npy',
                forcing_file_name='test_forcing.npy',
                attr_file_name='test_attr.npy',
                f_dict_file_name='test_dictFactorize.json',
                var_dict_file_name='test_dictAttribute.json',
                t_s_dict_file_name='test_dictTimeSpace.json')
            data_model1.update_model_param('train', nEpoch=300)
            data_model2 = GagesModel.load_datamodel(
                self.config_data_lstm.data_path["Temp"],
                "2",
                data_source_file_name='test_data_source.txt',
                stat_file_name='test_Statistics.json',
                flow_file_name='test_flow.npy',
                forcing_file_name='test_forcing.npy',
                attr_file_name='test_attr.npy',
                f_dict_file_name='test_dictFactorize.json',
                var_dict_file_name='test_dictAttribute.json',
                t_s_dict_file_name='test_dictTimeSpace.json')
            model_input = GagesSimDataModel(data_model1, data_model2)
            pred, obs = master_test_natural_flow(model_input,
                                                 epoch=self.test_epoch)
            basin_area = model_input.data_model2.data_source.read_attr(
                model_input.data_model2.t_s_dict["sites_id"], ['DRAIN_SQKM'],
                is_return_dict=False)
            mean_prep = model_input.data_model2.data_source.read_attr(
                model_input.data_model2.t_s_dict["sites_id"], ['PPTAVG_BASIN'],
                is_return_dict=False)
            mean_prep = mean_prep / 365 * 10
            pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
            obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)

            flow_pred_file = os.path.join(
                model_input.data_model2.data_source.data_config.
                data_path['Temp'],
                'epoch' + str(self.test_epoch) + 'flow_pred')
            flow_obs_file = os.path.join(
                model_input.data_model2.data_source.data_config.
                data_path['Temp'], 'epoch' + str(self.test_epoch) + 'flow_obs')
            serialize_numpy(pred, flow_pred_file)
            serialize_numpy(obs, flow_obs_file)
Exemple #7
0
 def test_forecast(self):
     source_data = unserialize_pickle(self.data_source_test_file)
     # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5
     stat_dict = unserialize_json(self.stat_file)
     data_flow = unserialize_numpy(self.flow_npy_file)
     data_forcing = unserialize_numpy(self.forcing_npy_file)
     data_attr = unserialize_numpy(self.attr_npy_file)
     # dictFactorize.json is the explanation of value of categorical variables
     var_dict = unserialize_json(self.var_dict_file)
     f_dict = unserialize_json(self.f_dict_file)
     t_s_dict = unserialize_json(self.t_s_dict_file)
     data_model_test = DataModel(source_data, data_flow, data_forcing,
                                 data_attr, var_dict, f_dict, stat_dict,
                                 t_s_dict)
     pred, obs = hydroDL.master_test(data_model_test)
     print(pred)
     print(obs)
     serialize_numpy(pred, self.flow_pred_file)
     serialize_numpy(obs, self.flow_obs_file)
Exemple #8
0
def save_datamodel(data_model, num_str=None, **kwargs):
    if num_str:
        dir_temp = os.path.join(
            data_model.data_source.data_config.data_path["Temp"], num_str)
    else:
        dir_temp = data_model.data_source.data_config.data_path["Temp"]
    if not os.path.isdir(dir_temp):
        os.makedirs(dir_temp)
    data_source_file = os.path.join(dir_temp, kwargs['data_source_file_name'])
    stat_file = os.path.join(dir_temp, kwargs['stat_file_name'])
    flow_file = os.path.join(dir_temp, kwargs['flow_file_name'])
    forcing_file = os.path.join(dir_temp, kwargs['forcing_file_name'])
    attr_file = os.path.join(dir_temp, kwargs['attr_file_name'])
    f_dict_file = os.path.join(dir_temp, kwargs['f_dict_file_name'])
    var_dict_file = os.path.join(dir_temp, kwargs['var_dict_file_name'])
    t_s_dict_file = os.path.join(dir_temp, kwargs['t_s_dict_file_name'])
    serialize_pickle(data_model.data_source, data_source_file)
    serialize_json(data_model.stat_dict, stat_file)
    serialize_numpy(data_model.data_flow, flow_file)
    serialize_numpy(data_model.data_forcing, forcing_file)
    serialize_numpy(data_model.data_attr, attr_file)
    # dictFactorize.json is the explanation of value of categorical variables
    serialize_json(data_model.f_dict, f_dict_file)
    serialize_json(data_model.var_dict, var_dict_file)
    serialize_json(data_model.t_s_dict, t_s_dict_file)
Exemple #9
0
 def test_Susquehanna(self):
     t_test = self.config_data.model_dict["data"]["tRangeTest"]
     source_data = SusquehannaSource(self.config_data, t_test)
     # 构建输入数据类对象
     data_model = SusquehannaModel(source_data)
     with torch.cuda.device(1):
         # pred, obs = master_test(data_model)
         pred, obs = master_test(data_model, epoch=300)
         flow_pred_file = os.path.join(
             data_model.data_source.data_config.data_path['Temp'],
             'flow_pred')
         flow_obs_file = os.path.join(
             data_model.data_source.data_config.data_path['Temp'],
             'flow_obs')
         serialize_numpy(pred, flow_pred_file)
         serialize_numpy(obs, flow_obs_file)
         plot_we_need(data_model,
                      obs,
                      pred,
                      id_col="id",
                      lon_col="lon",
                      lat_col="lat")
Exemple #10
0
 def test_test_camels(self):
     data_model = CamelsModel.load_datamodel(
         self.config_data.data_path["Temp"],
         data_source_file_name='test_data_source.txt',
         stat_file_name='test_Statistics.json',
         flow_file_name='test_flow.npy',
         forcing_file_name='test_forcing.npy',
         attr_file_name='test_attr.npy',
         f_dict_file_name='test_dictFactorize.json',
         var_dict_file_name='test_dictAttribute.json',
         t_s_dict_file_name='test_dictTimeSpace.json')
     with torch.cuda.device(1):
         # pred, obs = master_test(data_model)
         pred, obs = master_test(data_model, epoch=300)
         basin_area = data_model.data_source.read_attr(
             data_model.t_s_dict["sites_id"], ['area_gages2'],
             is_return_dict=False)
         mean_prep = data_model.data_source.read_attr(
             data_model.t_s_dict["sites_id"], ['p_mean'],
             is_return_dict=False)
         pred = _basin_norm(pred, basin_area, mean_prep, to_norm=False)
         obs = _basin_norm(obs, basin_area, mean_prep, to_norm=False)
         flow_pred_file = os.path.join(
             data_model.data_source.data_config.data_path['Temp'],
             'flow_pred')
         flow_obs_file = os.path.join(
             data_model.data_source.data_config.data_path['Temp'],
             'flow_obs')
         serialize_numpy(pred, flow_pred_file)
         serialize_numpy(obs, flow_obs_file)
         plot_we_need(data_model,
                      obs,
                      pred,
                      id_col="id",
                      lon_col="lon",
                      lat_col="lat")
Exemple #11
0
 def test_data_model_test(self):
     source_data = unserialize_pickle(self.data_source_test_file)
     data_model = DataModel(source_data)
     # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5
     serialize_json(data_model.stat_dict, self.stat_file)
     serialize_numpy(data_model.data_flow, self.flow_file)
     serialize_numpy(data_model.data_forcing, self.forcing_file)
     serialize_numpy(data_model.data_attr, self.attr_file)
     # dictFactorize.json is the explanation of value of categorical variables
     serialize_json(data_model.f_dict, self.f_dict_file)
     serialize_json(data_model.var_dict, self.var_dict_file)
     serialize_json(data_model.t_s_dict, self.t_s_dict_file)