def multi_plot_regression(date_start, date_end, frequency='daily'): datetime_start = datetime.strptime(date_start, "%Y%m%d") datetime_end = datetime.strptime(date_end, "%Y%m%d") stats_data = defaultdict(list) while datetime_start <= datetime_end: date_str = datetime_start.strftime("%Y%m%d") datas = plot_verification_picture(date_str, date_end=date_end, frequency=frequency) if datas: for key, value in datas.items(): stats_data[key].append(value) if frequency == 'daily': datetime_start = datetime_start + relativedelta(days=1) elif frequency == 'monthly': datetime_start = datetime_start + relativedelta(months=1) elif frequency == 'seasonly': datetime_start = datetime_start + relativedelta(months=3) elif frequency == 'yearly': datetime_start = datetime_start + relativedelta(years=1) else: break stats_data = pd.DataFrame(stats_data) out_dir = os.path.join(AOD_PICTURE_DIR, 'STATS', MATCH) make_sure_path_exists(out_dir) out_file = os.path.join( out_dir, '{}_{}_{}_{}.csv'.format(AREA, frequency, date_start, date_end)) stats_data.to_csv(out_file, index=False) print('>>> {}'.format(out_file))
def match_file(): # FY3D_MERSI_ORBT_L2_AOD_MLT_NUL_20190102_0500_1000M_MS.HDF fy3d_file_dict = defaultdict(list) for root, dirs, files in os.walk(AOD_FY3D_1KM_DIR): for name in files: if name[-3:].lower() != 'hdf': continue ymd, hm = name.split('_')[7:9] date_str = ymd fy3d_file_dict[date_str].append(os.path.join(root, name)) # SATE_FY4A_AGRI_MULT_4000M_DISK_LDA_GLL_HYT_20190101000000_VPRJ5_L2.NC fy4a_file_dict = defaultdict(list) for root, dirs, files in os.walk(AOD_FY4A_4KM_DIR): for name in files: if name[-2:].lower() != 'nc': continue date_str = name.split('_')[9][0:8] fy4a_file_dict[date_str].append(os.path.join(root, name)) for fy3d_date_str in fy3d_file_dict.keys(): if fy3d_date_str in fy4a_file_dict: fy3d_files = fy3d_file_dict[fy3d_date_str] fy4a_files = fy4a_file_dict[fy3d_date_str] for fy3d_file in fy3d_files: for fy4a_file in fy4a_files: ymdhm_fy3d = os.path.basename(fy3d_file).split('_')[7:9] datetime_fy3d = datetime.strptime(''.join(ymdhm_fy3d), '%Y%m%d%H%M') ymdhms_fy4a = os.path.basename(fy4a_file).split( '_')[9][0:12] datetime_fy4a = datetime.strptime(ymdhms_fy4a, '%Y%m%d%H%M') datetime_start = datetime_fy3d - relativedelta(minutes=5) datetime_end = datetime_fy3d + relativedelta(minutes=5) if not (datetime_start <= datetime_fy4a <= datetime_end): continue match_dir = os.path.join(AOD_MATCH_DIR, "FY3D_1KM_FY4A_4KM") make_sure_path_exists(match_dir) out_file = get_out_file(fy3d_file, fy4a_file, match_dir) if os.path.isfile(out_file): print('already exist {}'.format(out_file)) continue geo_fy3d_name = 'FY3D_MERSI_GBAL_L1_{}_{}_GEO1K_MS.HDF'.format( ymdhm_fy3d[0], ymdhm_fy3d[1]) fy3d_geo_file = os.path.join(GEO_FY3D_1KM_DIR, geo_fy3d_name) match_fy3d_1km_fy4a_4km(fy3d_file, fy3d_geo_file, fy4a_file, out_file, longitude_range=LONGITUDE_RANGE, latitude_range=LATITUDE_RANGE)
def match_file(): print("<<< === {}".format(AOD_FY3D_1KM_DIR)) print("<<< === {}".format(AOD_MODIS_3KM_DIR)) fy3d_file_dict = defaultdict(list) for root, dirs, files in os.walk(AOD_FY3D_1KM_DIR): for name in files: if name[-3:].lower() != 'hdf': continue date_str = name.split('_')[7] fy3d_file_dict[date_str].append(os.path.join(root, name)) modis_file_dict = defaultdict(list) for root, dirs, files in os.walk(AOD_MODIS_3KM_DIR): for name in files: if name[-3:].lower() != 'hdf': continue date_j_str = name.split('.')[1][1:] date_str = datetime.strptime(date_j_str, "%Y%j").strftime("%Y%m%d") modis_file_dict[date_str].append(os.path.join(root, name)) for fy3d_date_str in fy3d_file_dict.keys(): if fy3d_date_str in modis_file_dict: fy3d_files = fy3d_file_dict[fy3d_date_str] modis_files = modis_file_dict[fy3d_date_str] for fy3d_file in fy3d_files: for modis_file in modis_files: ymdhm_fy3d = os.path.basename(fy3d_file).split('_')[7:9] datetime_fy3d = datetime.strptime(''.join(ymdhm_fy3d), '%Y%m%d%H%M') ymdhm_modis = os.path.basename(modis_file).split('.')[1:3] date_j_str = ymdhm_modis[0][1:] + ymdhm_modis[1] datetime_modis = datetime.strptime(date_j_str, "%Y%j%H%M") datetime_start = datetime_fy3d - relativedelta(minutes=60) datetime_end = datetime_fy3d + relativedelta(minutes=60) if not (datetime_start <= datetime_modis <= datetime_end): continue match_dir = os.path.join(AOD_MATCH_DIR, "FY3D_1KM_MODIS_3KM") make_sure_path_exists(match_dir) out_file = get_out_file(fy3d_file, modis_file, match_dir) if os.path.isfile(out_file): print('already exist {}'.format(out_file)) continue ymd_hm = os.path.basename(fy3d_file).split('_')[7:9] geo_fy3d_name = 'FY3D_MERSI_GBAL_L1_{}_{}_GEO1K_MS.HDF'.format( ymd_hm[0], ymd_hm[1]) fy3d_geo_file = os.path.join(GEO_FY3D_1KM_DIR, geo_fy3d_name) match_fy3d_1km_modis_3km(fy3d_file, fy3d_geo_file, modis_file, out_file, longitude_range=LONGITUDE_RANGE, latitude_range=LATITUDE_RANGE)
def match_file(): fy3d_file_dict = defaultdict(list) for root, dirs, files in os.walk(AOD_FY3D_5KM_DIR): for name in files: if name[-3:].lower() != 'hdf': continue date_str = name.split('_')[7] fy3d_file_dict[date_str].append(os.path.join(root, name)) modis_file_dict = defaultdict(list) for root, dirs, files in os.walk(AOD_MODIS_10KM_DIR): for name in files: if name[-3:].lower() != 'hdf': continue date_j_str = name.split('.')[1][1:] date_str = datetime.strptime(date_j_str, "%Y%j").strftime("%Y%m%d") modis_file_dict[date_str].append(os.path.join(root, name)) for fy3d_date_str in fy3d_file_dict.keys(): if fy3d_date_str in modis_file_dict: fy3d_files = fy3d_file_dict[fy3d_date_str] modis_files = modis_file_dict[fy3d_date_str] for fy3d_file in fy3d_files: for modis_file in modis_files: match_dir = os.path.join(AOD_MATCH_DIR, "FY3D_5KM_MODIS_10KM") make_sure_path_exists(match_dir) out_file = get_out_file(fy3d_file, modis_file, match_dir) if os.path.isfile(out_file): print('already exist {}'.format(out_file)) continue match_fy3d_5km_modis_10km(fy3d_file, modis_file, out_file, longitude_range=LONGITUDE_RANGE, latitude_range=LATITUDE_RANGE)
def combine_aqua_daily(datetime_start=None, datetime_end=None, data_dir=None, out_dir=None): # FY3D_MERSI_ORBT_L2_AOD_MLT_NUL_20190916_0910_1000M_MS.HDF # FY3D_MERSI_GBAL_L2_AOD_MLT_GLL_20190705_POAD_5000M_MS.HDF print('data_dir :{}'.format(data_dir)) file_dict = defaultdict(list) for root, dirs, files in os.walk(data_dir): for name in files: if name[-3:].lower() != 'hdf': continue date_str_file = name[10:17] date_ = datetime.strptime(date_str_file, "%Y%j") if datetime_start is not None and datetime_end is not None: if not (datetime_start <= date_ <= datetime_end): continue date_str = date_.strftime("%Y%m%d") file_dict[date_str].append(os.path.join(root, name)) if not file_dict: print('没有找到数据') return # 创建投影查找表 print('创建投影查找表') res_degree = 0.01 # 分辨率,1km projstr = "+proj=longlat +ellps=WGS84 +datum=WGS84 +no_defs" proj = ProjCore(projstr, res_degree, unit="deg", pt_tl=(69.995, 54.995), pt_br=(139.995, 9.995)) # 角点也要放在格点中心位置 for d_, files in file_dict.items(): aod_sum = np.zeros((proj.row, proj.col), dtype=np.float) aod_count = np.zeros_like(aod_sum, dtype=np.float) filename_out = 'AQUA_MODIS_GBAL_L2_AOD_MLT_GLL_{}_POAD_1000M_MS.HDF'.format( d_) file_out = os.path.join(out_dir, filename_out) if os.path.isfile(file_out): print('already exist {}'.format(file_out)) continue print('<<< {}'.format(d_)) for file_ in files: print('<<< {}'.format(file_)) file_loader = AodModis(file_) aod = file_loader.get_aod() lons, lats = file_loader.get_lon_lat() print(np.nanmin(aod), np.nanmax(aod), np.nanmean(aod)) if aod is None: print('aod 为 None: {}'.format(file_)) continue # 投影 print('投影') ii, jj = proj.lonslats2ij(lons, lats) valid = np.logical_and.reduce(( ii >= 0, ii < proj.row, jj >= 0, jj < proj.col, aod > 0, aod < 1.5, )) if valid.sum() == 0: print('valid.size == 0, continue') continue print('valid.sum() == {}'.format(valid.sum())) ii = ii[valid] jj = jj[valid] aod = aod[valid] aod_sum[ii, jj] += aod aod_count[ii, jj] += 1 print(np.nanmin(aod), np.nanmax(aod), np.nanmean(aod)) print(np.nanmin(aod_sum), np.nanmax(aod_sum), np.nanmean(aod_sum)) if aod_sum is not None and aod_count is not None: aod_mean = get_mean(aod_sum, aod_count) else: continue # 新的网格的经纬度 lons_grid, lats_grid = proj.grid_lonslats() make_sure_path_exists(out_dir) print((aod_mean != -999).sum()) data_write = { 'Optical_Depth_Land_And_Ocean': aod_mean, 'Longitude': lons_grid, 'Latitude': lats_grid } write_hdf5_and_compress(data_write, file_out)
def combine(frequency='Monthly', datetime_start=None, datetime_end=None, data_dir=None, out_dir=None, res_type='1KM', data_loader=None, satellite_sensor=None): print('frequency === {}'.format(frequency)) print('datetime_start === {}'.format(datetime_start)) print('datetime_end === {}'.format(datetime_end)) print('data_dir === {}'.format(data_dir)) print('out_dir === {}'.format(out_dir)) print('res_type === {}'.format(res_type)) print('satellite_sensor === {}'.format(satellite_sensor)) if data_loader is None: data_loader = AodCombine file_dict = defaultdict(list) for root, dirs, files in os.walk(data_dir): for name in files: if name[-3:].lower() != 'hdf': continue in_file = os.path.join(root, name) date_ = data_loader(in_file).dt if datetime_start is not None and datetime_end is not None: if not (datetime_start <= date_ <= datetime_end): continue if frequency == 'Monthly': date_str = get_month(date_) elif frequency == 'Seasonly': date_str = get_season(date_) elif frequency == 'Yearly': date_str = get_year(date_) else: raise ValueError(frequency) file_dict[date_str].append(in_file) res_dict = { '1KM': '1000M', '5KM': '5000M', } for d_, files in file_dict.items(): aod_sum = None aod_count = None lons = None lats = None res = res_dict[res_type] filename_out = '{}_GBAL_L2_AOD_MLT_GLL_{}_POAD_{}_MS.HDF'.format( satellite_sensor, d_, res) file_out = os.path.join(out_dir, filename_out) if os.path.isfile(file_out): print('already exist {}'.format(file_out)) continue for file_ in files: print('<<< {}'.format(file_)) loader = data_loader(file_) aod = loader.get_aod() # 无效值赋值为-999 if aod is None: print('aod 为 None: {}'.format(file_)) continue valid = np.logical_and(aod > 0, aod < 1.5) aod[~valid] = -999 # print(np.nanmin(aod), np.nanmax(aod), np.nanmean(aod)) aod_sum, aod_count = get_sum_count(aod_sum, aod_count, aod) if lons is None and lats is None: lons, lats = loader.get_lon_lat() if aod_sum is not None and aod_count is not None: aod_mean = get_mean(aod_sum, aod_count) else: continue make_sure_path_exists(out_dir) print((aod_mean != -999).sum()) data_write = { 'Optical_Depth_Land_And_Ocean': aod_mean, 'Longitude': lons, 'Latitude': lats } write_hdf5_and_compress(data_write, file_out)
def main(data_type, in_path, out_path, geo_path): assert data_type in { 'FY3D_MERSI_1KM', 'FY3D_MERSI_5KM', 'AQUA_MODIS_3KM', 'AQUA_MODIS_10KM' }, "{} 输入错误".format(data_type) if os.path.isfile(in_path): filelist = [in_path] elif os.path.isdir(in_path): filelist = list() for root, dirs, files in os.walk(in_path): for name in files: if name[-3:].lower() != 'hdf': continue filelist.append(os.path.join(root, name)) else: raise ValueError('{} 不是文件夹也不是有效文件'.format(in_path)) assert len(filelist) > 0, "输入文件的数量小于1" make_sure_path_exists(out_path) if data_type in {'FY3D_MERSI_1KM'}: data_loader = AodFy3d1km elif data_type in {'FY3D_MERSI_5KM'}: data_loader = AodFy3d5km elif data_type in {'AQUA_MODIS_3KM', 'AQUA_MODIS_10KM'}: data_loader = AodModis else: raise ValueError(data_type) for in_file in filelist: if geo_path is not None: if os.path.isfile(geo_path): loader = data_loader(in_file, geo_file=geo_path) else: loader = data_loader(in_file, geo_path=geo_path) else: loader = data_loader(in_file) dt = loader.dt data = loader.get_aod() # 无效值为-999 lons, lats = loader.get_lon_lat() data, lons, lats = proj_china(data, lons, lats, 0, 1.5) if data_type in {"FY3D_MERSI_5KM"}: fill_points_2d(data, -999) fill_points_2d(data, -999) for area_type in ["China", "YRD"]: title = get_title(data_type, dt, area_type) filename = loader.filename + '.{}.png'.format(area_type) out_file = os.path.join(out_path, filename) vmin = 0 vmax = 1.5 ticks = np.arange(0, 1.51, 0.3) if area_type == 'China': nanhai = True else: nanhai = False mksize = 5 areas = get_areas(area_type) mask = get_province_mask(areas) valid = np.logical_and.reduce((data > vmin, data < vmax, mask)) data_mask = data[valid] lons_mask = lons[valid] lats_mask = lats[valid] count = len(data_mask) print('count == {}'.format(count)) longitude_range, latitude_range = get_area_range(area_type) box = [ latitude_range[1], latitude_range[0], longitude_range[0], longitude_range[1] ] plot_map_picture(data_mask, lons_mask, lats_mask, title=title, vmin=vmin, vmax=vmax, areas=areas, box=box, ticks=ticks, file_out=out_file, mksize=mksize, nanhai=nanhai)