def day_map_plot(config_dict): hdf_fn = config_dict.get("hdf_fn") fig_fp = config_dict.get("fig_fp") area_extent = config_dict.get("area_extent") dataset_name = config_dict.get("dataset_name") colorbar_title = config_dict.get("colorbar_title") title = config_dict.get("title") llcrnrlon = area_extent[1] llcrnrlat = area_extent[0] urcrnrlon = area_extent[3] urcrnrlat = area_extent[2] projection = "cyl" lon_0 = ((area_extent[3] - area_extent[1]) / 2) + area_extent[1] lat_0 = ((area_extent[2] - area_extent[0]) / 2) + area_extent[0] hdf_reader = HDFReader(hdf_fn) pred = hdf_reader.get_dataset(dataset_name) fill_value = hdf_reader.get_dataset_attr(dataset_name, "FillValue") pred[pred == fill_value] = np.nan pred = np.flipud(pred) map_drawer = PMMap(llcrnrlon, llcrnrlat, urcrnrlon, urcrnrlat, projection, lon_0, lat_0) map_drawer.add_data2D(pred, colorbar_title, title) map_drawer.save_fig(fig_fp)
def day_scatter_plot(config_dict): fig_fp = config_dict.get("fig_fp") os.makedirs(os.path.basename(fig_fp), exist_ok=True) hdf_fp = config_dict.get("hdf_fp") actual = config_dict.get("actual") pred = config_dict.get("pred") area_extent = config_dict.get("area_extent") res = config_dict.get("res") title = config_dict.get("title") x_label = config_dict.get("x_label") y_label = config_dict.get("y_label") dst_lat, dst_lon = get_roi_latlon(area_extent, res) gbl_line, gbl_pixel = get_roi_rcs(dst_lat, dst_lon) hdf_reader = HDFReader(hdf_fp) pred_data = hdf_reader.get_dataset(pred) actual_data = hdf_reader.get_dataset(actual) fill_val = hdf_reader.get_dataset_attr(pred, "FillValue") actual_data = actual_data[gbl_line, gbl_pixel] pred_data = pred_data[gbl_line, gbl_pixel] mask = actual_data != fill_val mask &= pred_data != fill_val pred_data = pred_data[mask] actual_data = actual_data[mask] pm_scatter = PMScatter() pm_scatter.draw(actual_data, pred_data, title, x_label, y_label, fig_fp)
def mapping(self, mersi_fn, dataset_name, dst_lon, dst_lat): h5_reader = HDFReader(mersi_fn) data = self.mask_scale(dataset_name, h5_reader) warped_data = self.down_sample(data, dst_lat, dst_lon) self.__dataset_dict[dataset_name] = warped_data self.__dataset_attr_dict[dataset_name] = { "long_name": h5_reader.get_dataset_attr(dataset_name, "long_name"), "units": h5_reader.get_dataset_attr(dataset_name, "units"), "Slope": 1, "Intercept": 0, "FillValue": -32767, "valid_range": self.__valid_range_dict.get(dataset_name) } return self.__dataset_dict, self.__dataset_attr_dict
def minmax(self, f): hdf_reader = HDFReader(f) for name in self.vr_dict.keys(): data = hdf_reader.get_dataset(name) fill_val = hdf_reader.get_dataset_attr(name, "FillValue") if len(data[data != fill_val]) == 0: continue max_val = data[data != fill_val].max() min_val = data[data != fill_val].min() if max_val > self.vr_dict.get(name)[1]: self.vr_dict[name][1] = max_val if min_val < self.vr_dict.get(name)[0]: self.vr_dict[name][0] = min_val
def get_roi_mean_df(actual, gbl_line, gbl_pixel, hdf_list, pred): df = pd.DataFrame(columns=["date", actual, pred]) for idx, f in enumerate(sorted(hdf_list)): hdf_reader = HDFReader(f) df_c = pd.DataFrame(columns=["date", actual, pred]) date_str = os.path.basename(f)[:8] fill_value = hdf_reader.get_dataset_attr(actual, "FillValue") pm25 = hdf_reader.get_dataset(actual)[gbl_line, gbl_pixel] pm25[pm25 == fill_value] = np.nan pred_pm25 = hdf_reader.get_dataset(pred)[gbl_line, gbl_pixel] pred_pm25[pred_pm25 == fill_value] = np.nan df_c.loc[idx, "date"] = date_str df_c.loc[idx, actual] = np.nanmean(pm25) df_c.loc[idx, pred] = np.nanmean(pred_pm25) df = df.append(df_c, ignore_index=True) df = df.dropna() return df
def write_result_to_hdf(pred_data, pred_target, cur_fp, out_dir, out_vars): vars_dict = dict() vars_attrs_dict = dict() h5_reader = HDFReader(cur_fp) gbl_attrs = h5_reader.get_global_attrs() for key in out_vars: data = h5_reader.get_dataset(key) if key == "demo_flag": pred_data_attrs = h5_reader.get_dataset_attrs(pred_target) pred_data_attrs["long_name"] = pred_target + " from PMNet" vars_dict["Pred_" + pred_target] = pred_data vars_attrs_dict["Pred_" + pred_target] = pred_data_attrs vars_dict[key] = data vars_attrs_dict[key] = h5_reader.get_dataset_attrs(key) os.makedirs(out_dir, exist_ok=True) h5_writer = HDFWriter(os.path.join(out_dir, os.path.basename(cur_fp))) h5_writer.set_global_attrs(gbl_attrs) for key in vars_dict.keys(): data = vars_dict.get(key) data_attrs = vars_attrs_dict.get(key) h5_writer.create_dataset(key, data_attrs, data, data.dtype)
def get_season_year_mean_data_dict(hdf_lst, dataset_name): result_dict = dict() season_fp_dict = dict() for season in ["spring", "summer", "autumn", "winter"]: season_fp_dict[season] = list() for f in hdf_lst: mon = str(int(os.path.basename(f)[4:6])) if str(mon) in ["3", "4", "5"]: season_fp_dict["spring"].append(f) elif str(mon) in ["6", "7", "8"]: season_fp_dict["summer"].append(f) elif str(mon) in ["9", "10", "11"]: season_fp_dict["autumn"].append(f) else: season_fp_dict["winter"].append(f) year_data_lst = list() for key in season_fp_dict.keys(): season_data_lst = list() for f in season_fp_dict.get(key): hdf_reader = HDFReader(f) pred = hdf_reader.get_dataset(dataset_name) fill_value = hdf_reader.get_dataset_attr(dataset_name, "FillValue") pred[pred == fill_value] = np.nan season_data_lst.append(pred) season_data = np.asarray(season_data_lst) season_mean = np.nanmean(season_data, axis=0) season_mean = np.flipud(season_mean) result_dict[key] = season_mean year_data_lst.append(season_mean) year_data = np.asarray(year_data_lst) year_mean = np.nanmean(year_data, axis=0) result_dict["year"] = year_mean return result_dict
def test_HDFReader(): hdf_fn = r"D:\01-work_directory\03-PM2.5\PMs\data\MERSI\FY3D_MERSI_GBAL_L2_AOD_MLT_GLL_20180817_POAD_5000M_MS.HDF" h_reader = HDFReader(hdf_fn) data = h_reader.get_dataset("AOT_550_Mean") slope = h_reader.get_dataset_attr("AOT_550_Mean", "Slope") global_attrs = h_reader.get_global_attrs() print(global_attrs) aod_attrs = h_reader.get_dataset_attrs("AOT_550_Mean") print(aod_attrs) print(data.shape) print(slope)
def dataset_format(ds_fst, scalar_f, out_fd): os.makedirs(out_fd, exist_ok=True) vr_dict = json_loader(scalar_f) for f in tqdm.tqdm(ds_fst, ascii=True, desc="dataset format"): hdf_reader = HDFReader(f) gbl_attrs = hdf_reader.get_global_attrs() hdf_writer = HDFWriter(os.path.join(out_fd, os.path.basename(f))) hdf_writer.set_global_attrs(gbl_attrs) names = hdf_reader.get_dataset_names() for name in names: data = hdf_reader.get_dataset(name) attrs = hdf_reader.get_dataset_attrs(name) if name in vr_dict.keys(): attrs["valid_range"] = vr_dict.get(name) hdf_writer.create_dataset(name, attrs, data, data.dtype)