path_omi_processed = os.path.join(data_base_dir, 'Preprocessed_raw', 'OMI') #data_base_dir = os.path.join(project_path, 'Data') #path_read = os.path.join(data_base_dir, 'Raw', 'OMI') #path_write = os.path.join(data_base_dir, 'Preprocessed_raw', 'OMI') #data_base_dir = os.path.join('/', 'share', 'irisnas5', 'GEMS', 'GEMS_python') #path_read = os.path.join('/','share','irisnas6','Data','OMI','00raw') #path_write = os.path.join(data_base_dir, 'Preprocessed_raw', 'OMI_tempConv') ### Setting period YEARS = [2016] ### OMNO2d print('OMNO2d') for yr in YEARS: tStart = time.time() doy_000 = matlab.datenum(f'{yr}0000') file_list = glob.glob( os.path.join(path_read, 'L3_grid', 'OMNO2d', str(yr), '*.he5')) file_list.sort() if yr % 4 == 0: days = 366 else: days = 365 data_yr = np.ones((1036800, days)) * np.nan for read_fname in file_list: temp = os.path.basename(read_fname) doy = matlab.datenum(temp[19:23] + temp[24:28]) - doy_000 print(f'Reading OMNO2d {yr}_{doy:03d}') data = matlab.h5read( read_fname, '/HDFEOS/GRIDS/ColumnAmountNO2/Data Fields/ColumnAmountNO2TropCloudScreened'
import numpy as np import glob import time ### Setting path data_base_dir = os.path.join('/data2', 'sehyun', 'Data') path_raw_goci = os.path.join(data_base_dir, 'Raw', 'GOCI_AOD') path_goci_aod = os.path.join(data_base_dir, 'Preprocessed_raw', 'GOCI_AOD') ### Setting period YEARS = [2016] #, 2018, 2019] MONTHS = range(1, 12 + 1) for yr in YEARS: doy_000 = matlab.datenum(f'{yr}0000') for mm in MONTHS: file_list = glob.glob( os.path.join(path_raw_goci, str(yr), f'{mm:02d}', '*.hdf')) file_list.sort() for fname in file_list: tStart = time.time() doy = matlab.datenum(os.path.basename(fname)[23:31]) doy = doy - doy_000 utc = int(os.path.basename(fname)[31:33]) GOCI_aod = matlab.hdfread( fname, 'Aerosol_Optical_Depth_550nm').astype('float64') GOCI_fmf = matlab.hdfread( fname, 'Fine_Mode_Fraction_550nm').astype('float64') GOCI_ssa = matlab.hdfread( fname, 'Single_Scattering_Albedo_440nm').astype('float64')
import numpy as np import glob import copy import time ### Setting path data_base_dir = os.path.join('/data2', 'sehyun', 'Data') path_gpm_raw = os.path.join(data_base_dir, 'Raw', 'GPM', '3IMERGHH') path_gpm_processed = os.path.join(data_base_dir, 'Preprocessed_raw', 'GPM', 'AP_24h_hourly') YEARS = [2016] for yr in YEARS: list_gpm = glob.glob(os.path.join(path_gpm_raw, str(yr), '*/*.HDF5')) list_gpm.sort() doy0 = matlab.datenum(str(yr - 1) + '1231') # First day UTC 00 list_temp = list_gpm[:48] size = (1800, 3600, 48) gpm = np.zeros(size) doy = matlab.datenum(os.path.basename(list_temp[0])[21:29]) - doy0 + 1 print(f'doy: {doy}') for j, fname in enumerate(list_temp[:48]): gpm_temp = matlab.h5read(fname, '/Grid/precipitationCal') gpm_temp = np.float64(gpm_temp) gpm_temp[gpm_temp < -9999] = np.nan gpm[:, :, j] += gpm_temp precip = np.nansum(gpm, axis=2) precip *= 0.5
file_list = glob.glob( os.path.join(path_in_situ, 'AirQuality_Japan', f'{yr}_soramame', f'{yr}{mm:02d}_00', '*.csv')) file_list.sort() stn_mm = None for fname in file_list: stn_tbl_temp = pd.read_csv(fname, encoding='latin1') stn_tbl_temp = stn_tbl_temp.values scode = stn_tbl_temp[:, 0] dstr = [str(x) for x in stn_tbl_temp[:, 1]] dstr = [f'{d[:4]}{d[5:7]}{d[8:]}' for d in dstr] # yyyy/mm/dd -> yyyymmdd dvec = [(int(d[:4]), int(d[4:6]), int(d[6:])) for d in dstr] dvec = np.array(dvec) data_datenum = [matlab.datenum(val) for val in dstr] doy_000 = matlab.datenum(f'{yr}00000') doy = np.array([val - doy_000 for val in data_datenum]) stn_value = stn_tbl_temp[:, 2:15] data = np.hstack([ doy.reshape(-1, 1), dvec, stn_value[:, [0, 1, 5, 6, 3, 10, 11]], scode.reshape(-1, 1) ]) # {'doy','yr','mon','day','KST','SO2','CO','OX','NO2','PM10','PM25','scode'} if stn_mm is None: stn_mm = data else: stn_mm = np.vstack([stn_mm, data]) matlab.savemat( os.path.join(path_stn_jp, 'stn_code_data',
f'*_{p}.txt')) file_list.sort() data = None for fname in file_list: data_temp = pd.read_csv(fname, encoding='latin1') if data is None: data = data_temp else: data = pd.concat([data, data_temp]) data = data.values if varname == 'PM25': #Need to check vv = data[:, 3] idx_PM25 = np.isin(vv, ['PM25']) data = data[idx_PM25, :] data = np.delete(data, [3, 4], axis=1) yrmonday = data[:, 0] * 10000 + data[:, 3] * 100 + data[:, 4] data_datenum = [matlab.datenum(str(val)) for val in yrmonday] doy_000 = matlab.datenum(f'{yr}00000') data_doy = np.array([val - doy_000 for val in data_datenum]) data_info = np.hstack([ data_doy.reshape(-1, 1), data[:, 0].reshape(-1, 1), data[:, 3].reshape(-1, 1), data[:, 4].reshape(-1, 1), data[:, 1:3] ]) # 'doy','year','month','day','scode','ccode' data_new = None for KST in range(1, 24 + 1): data_temp = data_info data_temp = np.hstack( [data_temp, np.full([data_temp.shape[0], 1], KST)]) #data_temp[:,6]=KST data_temp = np.hstack([data_temp, data[:, 4 + KST].reshape(-1, 1)]) if data_new is None: data_new = data_temp
'RDAPS') # revised ### Setting period YEARS = [2016] #, 2018, 2019 for yr in YEARS: #file_list = glob.glob(os.path.join(path_rdaps_raw, str(yr), '*000.*.gb2')) #file_list.sort() curr_path = os.path.join(rdaps_path, str(yr)) list_char = glob.glob(os.path.join(curr_path, '*000.*.gb2')) list_char = [os.path.basename(f) for f in list_char] list_char.sort() # revised list_date = [x[21:29] for x in list_char] list_dnum = [matlab.datenum(date) for date in list_date] doy_000 = matlab.datenum(f'{yr}0101') - 1 rdaps = np.full((419, 491, 18), np.nan) for i, fname in enumerate(file_list): tStart = time.time() print(fname) rdaps_data = pygrib.open(fname) data = rdaps_data.select(name='Temperature', typeOfLevel='heightAboveGround')[0].values rdaps[:, :, 0] = np.squeeze(data) data = rdaps_data.select(name='Dew point temperature', typeOfLevel='heightAboveGround')[0].values rdaps[:, :, 1] = np.squeeze(data)
df['PM10'] = tmp_df['stnSPM'] del tmp_df tmp_df = read_table_mat( os.path.join(path_stn_jp, 'byPollutant/', f'JP_stnPM25_{yr}.mat')) df['PM25'] = tmp_df['stnPM25'] del tmp_df print('Reading done !') aa = np.array(list(df.index)) # KST, scode, doy df['KST'] = aa[:, 0] df['scode'] = aa[:, 1] df['doy'] = aa[:, 2] doy000 = matlab.datenum(f'{yr}00000') date_list = dict() for x in range(1, days + 1): date_list[x] = matlab.datestr(doy000 + x) dates = df['doy'].apply(lambda x: date_list[x]) dates = pd.DatetimeIndex(dates) df['yr'] = dates.year df['mon'] = dates.month df['day'] = dates.day df.reset_index(drop=True, inplace=True) for col in cols: df.loc[df[col] >= 9997, col] = np.nan df.loc[df[col] == -9999, col] = np.nan df.dropna(axis=0, subset=cols, how='all',
# 'NR','PHEN','PROD2','SO2','SULF','PM10','PEC','PM2_5','PMFINE', # 'PNO3','POA','PSO4','PMC','ISOP']; ## 27 km domain nr = 128 nc = 174 YEARS = [2017] # range(2017, 2019+1) KNU_dir = 'KNU_27_01' for yr in YEARS: tStart = time.time() if (yr % 4) == 0: days = 366 else: days = 365 curr_path = os.path.join(emis_path, KNU_dir, str(yr)) list_date = list( range(matlab.datenum(f'{yr}0101'), matlab.datenum(f'{yr}1231') + 1)) list_date = [str(matlab.datestr(d)) for d in list_date] # '2016-01-01' format list_date = [x[:4] + x[5:7] + x[8:] for x in list_date] # '20160101' format for i, date in enumerate(list_date): doy = i + 1 fname = f'egts3d_l.{yr}.{date[4:8]}.{KNU_dir}.AQFv1.ncf' #try: ncfile = netcdf.NetCDFFile( os.path.join(curr_path, f'NIER_09h_EMIS_{date}', fname), 'r') var = list(ncfile.variables.keys()) emiss_all = np.full((nr, nc, len(var) - 1, 24), np.nan) # From 01UTC to 00UTC (next day)
lon_goci, lat_goci = mat['lon_goci'], mat['lat_goci'] del mat mat = matlab.loadmat(os.path.join(path_grid_raw, 'grid_omi_25.mat')) lon_omi, lat_omi = mat['lon_omi'], mat['lat_omi'] del mat lon_omi = lon_omi[340:560, 1020:1320] lat_omi = lat_omi[340:560, 1020, 1320] points = np.array([lon_omi.ravel(order='F'), lat_omi.ravel(order='F')]).T del lon_omi, lat_omi print(f'points shape : {points.shape}') dlist = list( map(lambda x: x + matlab.datenum('20050000'), list(range(1, 5478 + 1)))) dvec = list(map(lambda x: matlab.datestr(x), dlist)) pname_list = ['OMNO2d', 'OMSO2e_m', 'OMDOAO3e_m', 'OMHCHOG'] for pname in pname_list: if pname == 'OMNO2d': data_conv = matlab.loadmat( os.path.join( path_omi_processed, f'tempConv_{pname}_trop_CS_sigma1_2005_2019.mat'))['data_conv'] elif pname == 'OMSO2e_m': data_conv = matlab.loadmat( os.path.join( path_omi_processed, f'tempConv_{pname[:6]}_sigma2_2005_2019.mat'))['data_conv'] elif pname == 'OMDOAO3e_m':
write_path = os.path.join(project_path, 'Data', 'Prepreossed_raw', 'GPM', 'AP_24h_hourly') """ name = '2014/3B-HHR.MS.MRG.3IMERG.20141231-S000000-E002959.0000.V04A.HDF5' lat_gpm = matlab.h5read(os.path.join(raw_data_path, name), '/Grid/lat') lon_gpm = matlab.h5read(os.path.join(raw_data_path, name), '/Grid/lon') lat_gpm = np.float64(lat_gpm); lon_gpm = np.float64(lon_gpm) lat_gpm, lon_gpm = np.meshgrid(lat_gpm, lon_gpm) sio.savemat('grid_gpm.mat', mdict={'lon_gpm':lon_gpm, 'lat_gpm':lat_gpm}) """ YEARS = [2014] for yr in YEARS: os.chdir(raw_data_path) list_gpm = matlab.dir(str(yr), '.HDF5') # list_gpm = dir([num2str(yr),'/*/*.HDF5']); doy_0 = matlab.datenum(str(yr-1)+'1231') # First day UTC 00 list_temp = list_gpm[:48] doy = matlab.datenum(list_temp[0][21:29])-doy_0+1 gpm = np.zeros([1800, 3600, 48]) # lat 1800, lon 3600 for j in range(48): gpm_temp = matlab.h5read(list_gpm[j], '/Grid/precipitationCal') gpm_temp = np.float64(gpm_temp) gpm_temp[gpm_temp<-9999] = np.nan gpm[:,:,j] = gpm_temp precip = np.nansum(gpm, axis=2) precip = precip*0.5 #### 30분 자료인데, 단위는 hour 단위라서 0.5곱해줌 sio.savemat(os.path.join(write_path, str(yr), f'gpm_AP_{yr}_{doy:3d}_UTC00.mat'), mdict={'precip':precip}) % For doy 001 UTC01 to doy 365(366) UTC23
fname=f'GOCI_Type_{filename}', data={'GOCI_type': GOCI_type}) matlab.savemat(dirname=os.path.join( path, 'No_of_Used_500m_Pixels_for_One_6km_Product_Pixel', str(yr)), fname=f'GOCI_num_used_pixels_{filename}', data={'GOCI_num_used_pixels': GOCI_num_used_pixels}) matlab.savemat(dirname=os.path.join(path, 'NDVI', str(yr)), fname=f'GOCI_NDVI_{filename}', data={'GOCI_ndvi': GOCI_ndvi}) matlab.savemat(dirname=os.path.join(path, 'DAI', str(yr)), fname=f'GOCI_DAI_{filename}', data={'GOCI_dai': GOCI_dai}) for yr in YEARS: doy_000 = matlab.datenum(f'{yr}0000') days = 366 if (yr % 4) == 0 else 365 num_utc = 8 all_list_doy = np.tile(range(1, days + 1), (num_utc, 1)) all_list_utc = np.tile(range(num_utc), (1, days)) all_date = np.vstack((all_list_doy.T.flatten(), all_list_utc.flatten())).T isNotDateExist = np.full(len(all_date), True) for mm in MONTHS: curr_path = os.path.join(raw_data_path, str(yr), f'{mm:02d}') #os.chdir(curr_path) list_mm = glob.glob(os.path.join(curr_path, '*.hdf')) list_mm = [os.path.basename(f) for f in list_mm]