Example #1
0
    def test_export_result(self):
        data_model = GagesModel.load_datamodel(
            self.config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        flow_pred_file = os.path.join(
            data_model.data_source.data_config.data_path['Temp'],
            'flow_pred.npy')
        flow_obs_file = os.path.join(
            data_model.data_source.data_config.data_path['Temp'],
            'flow_obs.npy')
        pred = unserialize_numpy(flow_pred_file)
        obs = unserialize_numpy(flow_obs_file)
        pred = pred.reshape(pred.shape[0], pred.shape[1])
        obs = obs.reshape(obs.shape[0], obs.shape[1])
        inds = statError(obs, pred)
        inds['STAID'] = data_model.t_s_dict["sites_id"]
        inds_df = pd.DataFrame(inds)

        inds_df.to_csv(
            os.path.join(self.config_data.data_path["Out"], 'data_df.csv'))
Example #2
0
 def load_datamodel(cls, dir_temp_orgin, num_str=None, **kwargs):
     if num_str:
         dir_temp = os.path.join(dir_temp_orgin, num_str)
     else:
         dir_temp = dir_temp_orgin
     data_source_file = os.path.join(dir_temp,
                                     kwargs['data_source_file_name'])
     stat_file = os.path.join(dir_temp, kwargs['stat_file_name'])
     flow_npy_file = os.path.join(dir_temp, kwargs['flow_file_name'])
     forcing_npy_file = os.path.join(dir_temp, kwargs['forcing_file_name'])
     attr_npy_file = os.path.join(dir_temp, kwargs['attr_file_name'])
     f_dict_file = os.path.join(dir_temp, kwargs['f_dict_file_name'])
     var_dict_file = os.path.join(dir_temp, kwargs['var_dict_file_name'])
     t_s_dict_file = os.path.join(dir_temp, kwargs['t_s_dict_file_name'])
     source_data = unserialize_pickle(data_source_file)
     # save data_model because of the low speed of serialization of data_model: dict -> json,data -> npy
     stat_dict = unserialize_json(stat_file)
     data_flow = unserialize_numpy(flow_npy_file)
     data_forcing = unserialize_numpy(forcing_npy_file)
     data_attr = unserialize_numpy(attr_npy_file)
     # dictFactorize.json is the explanation of value of categorical variables
     var_dict = unserialize_json(var_dict_file)
     f_dict = unserialize_json(f_dict_file)
     t_s_dict = unserialize_json(t_s_dict_file)
     data_model = cls(source_data, data_flow, data_forcing, data_attr,
                      var_dict, f_dict, stat_dict, t_s_dict)
     return data_model
Example #3
0
    def test_inv_plot(self):
        data_model = GagesModel.load_datamodel(
            self.config_data_2.data_path["Temp"],
            "2",
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')
        flow_pred_file = os.path.join(
            data_model.data_source.data_config.data_path['Temp'],
            'epoch' + str(self.test_epoch) + 'flow_pred.npy')
        flow_obs_file = os.path.join(
            data_model.data_source.data_config.data_path['Temp'],
            'epoch' + str(self.test_epoch) + 'flow_obs.npy')
        pred = unserialize_numpy(flow_pred_file)
        obs = unserialize_numpy(flow_obs_file)
        pred = pred.reshape(pred.shape[0], pred.shape[1])
        obs = obs.reshape(obs.shape[0], obs.shape[1])
        inds = statError(obs, pred)
        inds['STAID'] = data_model.t_s_dict["sites_id"]
        inds_df = pd.DataFrame(inds)
        inds_df.to_csv(
            os.path.join(self.config_data_2.data_path["Out"], 'data_df.csv'))
        # plot box,使用seaborn库
        keys = ["Bias", "RMSE", "NSE"]
        inds_test = subset_of_dict(inds, keys)
        box_fig = plot_diff_boxes(inds_test)
        box_fig.savefig(
            os.path.join(self.config_data_2.data_path["Out"], "box_fig.png"))
        # plot ts
        show_me_num = 5
        t_s_dict = data_model.t_s_dict
        sites = np.array(t_s_dict["sites_id"])
        t_range = np.array(t_s_dict["t_final_range"])
        time_seq_length = self.config_data_1.model_dict['model']['seqLength']
        time_start = np.datetime64(t_range[0]) + np.timedelta64(
            time_seq_length - 1, 'D')
        t_range[0] = np.datetime_as_string(time_start, unit='D')
        ts_fig = plot_ts_obs_pred(obs, pred, sites, t_range, show_me_num)
        ts_fig.savefig(
            os.path.join(self.config_data_2.data_path["Out"], "ts_fig.png"))

        # plot nse ecdf
        sites_df_nse = pd.DataFrame({
            "sites": sites,
            keys[2]: inds_test[keys[2]]
        })
        plot_ecdf(sites_df_nse, keys[2])
        # plot map
        gauge_dict = data_model.data_source.gage_dict
        plot_map(gauge_dict,
                 sites_df_nse,
                 id_col="STAID",
                 lon_col="LNG_GAGE",
                 lat_col="LAT_GAGE")
Example #4
0
def load_result(save_dir, epoch, pred_name='flow_pred', obs_name='flow_obs'):
    """load the pred value of testing period and obs value"""
    flow_pred_file = os.path.join(save_dir,
                                  'epoch' + str(epoch) + pred_name + '.npy')
    flow_obs_file = os.path.join(save_dir,
                                 'epoch' + str(epoch) + obs_name + '.npy')
    pred = unserialize_numpy(flow_pred_file)
    obs = unserialize_numpy(flow_obs_file)
    return pred, obs
Example #5
0
    def test_plot_1by1(self):
        data_config = self.config_data.read_data_config()
        regions = data_config["regions"]
        data_model_test_lst = []
        obs_lsts = []
        pred_lsts = []
        for i in range(1, len(regions) + 1):
            data_dir_i_temp = '/'.join(
                self.config_data.data_path['Temp'].split('/')[:-1])
            data_dir_i = os.path.join(data_dir_i_temp, "exp" + str(i))
            data_model_i = GagesModel.load_datamodel(
                data_dir_i,
                data_source_file_name='test_data_source.txt',
                stat_file_name='test_Statistics.json',
                flow_file_name='test_flow.npy',
                forcing_file_name='test_forcing.npy',
                attr_file_name='test_attr.npy',
                f_dict_file_name='test_dictFactorize.json',
                var_dict_file_name='test_dictAttribute.json',
                t_s_dict_file_name='test_dictTimeSpace.json')
            data_model_test_lst.append(data_model_i)

            flow_pred_file_i = os.path.join(data_dir_i, 'flow_pred.npy')
            flow_obs_file_i = os.path.join(data_dir_i, 'flow_obs.npy')
            preds = unserialize_numpy(flow_pred_file_i)
            obss = unserialize_numpy(flow_obs_file_i)
            obs_lsts.append(obss)
            pred_lsts.append(preds)

        # pred_final = unserialize_numpy(self.flow_pred_file)
        # obs_final = unserialize_numpy(self.flow_obs_file)
        obs_final = reduce(lambda a, b: np.vstack((a, b)), obs_lsts)
        pred_final = reduce(lambda a, b: np.vstack((a, b)), pred_lsts)
        gages_model_test = GagesModel.load_datamodel(
            self.config_data.data_path["Temp"],
            data_source_file_name='test_data_source.txt',
            stat_file_name='test_Statistics.json',
            flow_file_name='test_flow.npy',
            forcing_file_name='test_forcing.npy',
            attr_file_name='test_attr.npy',
            f_dict_file_name='test_dictFactorize.json',
            var_dict_file_name='test_dictAttribute.json',
            t_s_dict_file_name='test_dictTimeSpace.json')

        data_model_test = GagesModel.compact_data_model(
            data_model_test_lst, gages_model_test.data_source)
        plot_we_need(data_model_test,
                     obs_final,
                     pred_final,
                     id_col="STAID",
                     lon_col="LNG_GAGE",
                     lat_col="LAT_GAGE")
Example #6
0
 def setUp(self):
     self.test_epoch = 20
     flow_pred_file = os.path.join(
         self.dir_temp, "epoch" + str(self.test_epoch) + 'flow_pred.npy')
     flow_obs_file = os.path.join(
         self.dir_temp, "epoch" + str(self.test_epoch) + 'flow_obs.npy')
     pred = unserialize_numpy(flow_pred_file)
     obs = unserialize_numpy(flow_obs_file)
     self.pred = pred.reshape(pred.shape[0], pred.shape[1])
     self.obs = obs.reshape(pred.shape[0], pred.shape[1])
     # # 统计性能指标
     self.inds = statError(self.obs, self.pred)
     # t_s_dict = unserialize_json(self.t_s_dict_file)
     # sites = np.array(t_s_dict["sites_id"])
     self.keys = ["NSE"]
     self.inds_test = subset_of_dict(self.inds, self.keys)
Example #7
0
    def test_daymet_avg_from_diff(self):

        daymet_myself_file = os.path.join(self.netcdf_dir, "daymet_01013500_mean_2000_myself.npy")
        myself_data_tmax = unserialize_numpy(daymet_myself_file)

        camels_data = pd.read_csv(os.path.join(self.dir_db,
                                               "basin_timeseries_v1p2_metForcing_obsFlow/basin_dataset_public_v1p2/basin_mean_forcing/daymet/01/01013500_lump_cida_forcing_leap.txt"),
                                  sep=r'\s+', header=None, skiprows=4)
        camels_data_tmax = camels_data[8].values[7305:7670]

        gee_data = pd.read_csv(os.path.join(self.netcdf_dir, "daymet_01013500_mean_2000.csv"))
        gee_data_tmax = gee_data["tmax"].values
        print()
        print("Bias, RMSE, NSE", statError1d(myself_data_tmax, camels_data_tmax))
        print("Bias, RMSE, NSE", statError1d(myself_data_tmax, gee_data_tmax))
        print("Bias, RMSE, NSE", statError1d(camels_data_tmax, gee_data_tmax))
        import matplotlib.pyplot as plt
        import seaborn as sns
        sns.set(style="whitegrid")

        values = np.array([myself_data_tmax, camels_data_tmax, gee_data_tmax]).T
        print(values)
        dates = pd.date_range("1 1 2000", periods=365, freq="D")
        data = pd.DataFrame(values, dates, columns=["myself", "camels", "gee"])
        print(data)
        sns.lineplot(data=data.iloc[:, 0:2], palette="tab10", linewidth=2.5)
        plt.show()
        sns.lineplot(data=data.iloc[:, 1:3], palette="tab10", linewidth=2.5)
        plt.show()
Example #8
0
 def test_forecast(self):
     source_data = unserialize_pickle(self.data_source_test_file)
     # 存储data_model,因为data_model里的数据如果直接序列化会比较慢,所以各部分分别序列化,dict的直接序列化为json文件,数据的HDF5
     stat_dict = unserialize_json(self.stat_file)
     data_flow = unserialize_numpy(self.flow_npy_file)
     data_forcing = unserialize_numpy(self.forcing_npy_file)
     data_attr = unserialize_numpy(self.attr_npy_file)
     # dictFactorize.json is the explanation of value of categorical variables
     var_dict = unserialize_json(self.var_dict_file)
     f_dict = unserialize_json(self.f_dict_file)
     t_s_dict = unserialize_json(self.t_s_dict_file)
     data_model_test = DataModel(source_data, data_flow, data_forcing,
                                 data_attr, var_dict, f_dict, stat_dict,
                                 t_s_dict)
     pred, obs = hydroDL.master_test(data_model_test)
     print(pred)
     print(obs)
     serialize_numpy(pred, self.flow_pred_file)
     serialize_numpy(obs, self.flow_obs_file)
Example #9
0
 def test_regions_seperate(self):
     data_model = GagesModel.load_datamodel(self.config_data.data_path["Temp"],
                                            data_source_file_name='test_data_source.txt',
                                            stat_file_name='test_Statistics.json', flow_file_name='test_flow.npy',
                                            forcing_file_name='test_forcing.npy', attr_file_name='test_attr.npy',
                                            f_dict_file_name='test_dictFactorize.json',
                                            var_dict_file_name='test_dictAttribute.json',
                                            t_s_dict_file_name='test_dictTimeSpace.json')
     gage_region_dir = data_model.data_source.all_configs.get("gage_region_dir")
     region_shapefiles = data_model.data_source.all_configs.get("regions")
     shapefiles = [os.path.join(gage_region_dir, region_shapefile + '.shp') for region_shapefile in
                   region_shapefiles]
     df_id_region = np.array(data_model.t_s_dict["sites_id"])
     assert (all(x < y for x, y in zip(df_id_region, df_id_region[1:])))
     id_regions_idx = []
     id_regions_sites_ids = []
     for shapefile in shapefiles:
         shape_data = gpd.read_file(shapefile)
         gages_id = shape_data['GAGE_ID'].values
         c, ind1, ind2 = np.intersect1d(df_id_region, gages_id, return_indices=True)
         assert (all(x < y for x, y in zip(ind1, ind1[1:])))
         assert (all(x < y for x, y in zip(c, c[1:])))
         id_regions_idx.append(ind1)
         id_regions_sites_ids.append(c)
     flow_pred_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_pred.npy')
     flow_obs_file = os.path.join(data_model.data_source.data_config.data_path['Temp'], 'flow_obs.npy')
     pred_all = unserialize_numpy(flow_pred_file)
     obs_all = unserialize_numpy(flow_obs_file)
     pred_all = pred_all.reshape(pred_all.shape[0], pred_all.shape[1])
     obs_all = obs_all.reshape(obs_all.shape[0], obs_all.shape[1])
     for i in range(len(id_regions_idx)):
         pred = pred_all[id_regions_idx[i], :]
         obs = obs_all[id_regions_idx[i], :]
         inds = statError(obs, pred)
         inds['STAID'] = id_regions_sites_ids[i]
         inds_df = pd.DataFrame(inds)
         inds_df.to_csv(os.path.join(self.config_data.data_path["Out"],
                                     region_shapefiles[i] + "epoch" + str(self.test_epoch) + 'data_df.csv'))