import pandas as pd import matplotlib.pyplot as plt import cartopy.crs as ccrs ### Setting path data_base_dir = os.path.join('/data2', 'sehyun', 'Data') path_grid_raw = os.path.join(data_base_dir, 'Raw', 'grid') path_ea_goci = os.path.join(data_base_dir, 'Preprocessed_raw', 'EA_GOCI6km') path_station = os.path.join(data_base_dir, 'Preprocessed_raw', 'Station') path_stn_jp = os.path.join(path_station, 'Station_JP') path_stn_cn = os.path.join(path_station, 'Station_CN') path_stn_kr = os.path.join(path_station, 'Station_KR') path_output = os.path.join(data_base_dir, 'output', 'RealTimeTraining', 'EastAsia') matlab.check_make_dir(path_output) tg = ['PM10','PM25'] ## Load grid mat = matlab.loadmat(os.path.join(path_grid_raw, 'grid_goci.mat')) lon_goci, lat_goci = mat['lon_goci'], mat['lat_goci'] del mat ## YEARS = [2016] for yr in YEARS: #mat = matlab.loadmat(os.path.join(path_stn_kr, 'stn_GOCI6km_location_weight_v2018.mat')) mat = matlab.loadmat(os.path.join(path_stn_kr, 'stn_GOCI6km_location_weight_v201904.mat')) dup_scode2_GOCI6km = mat['dup_scode2_GOCI6km'] df = pd.DataFrame(mat['stn_GOCI6km_location'], columns=mat['header_stn_GOCI6km_location'])
mask=maskarr, transform=masksrc.transform): shapes.append(json.loads(json.dumps(geometry))) YEARS = [2016] for yr in YEARS: print(yr) flist = glob.glob( os.path.join(path_myd_processed, '02prj_GCS_WGS84', str(yr), "*.tif")) flist.sort() for src_dataset in flist: tStart = time.time() dst_dataset = os.path.join(path_myd_processed, '03mask_SouthKorea_MYD13A2', str(yr), f'm_{os.path.basename(src_dataset)[2:]}') matlab.check_make_dir(os.path.dirname(dst_dataset)) with rio.open(src_dataset) as src: kwargs = src.meta.copy() kwargs['transform'] = masksrc.transform temp_dataset = os.path.join(os.path.dirname(dst_dataset), 'temp.tif') resolution = 1.02308446206551E-02 dst_crs = masksrc.crs with rio.open(temp_dataset, 'w+', **kwargs) as temp_dst: for i in range(1, src.count + 1): reproject( source=rio.band(src, i), destination=rio.band(temp_dst, i), src_transform=src.transform, src_crs=src.crs,
if t==3: name = os.path.basenanme(cal_list[c])[36:70] elif t==4: name = os.path.basenanme(cal_list[c])[32:66] else: name = os.path.basenanme(cal_list[c])[31:65] # Validation reslut pred_val = rf_model.predict(val.drop([target[i]]), val[target[i]]) pred_val = np.exp(pre_val) # save vali pred fname = f'rf_{name}_val_ranger.csv' pred_val = pd.DataFrame(pred_val) matlab.check_make_dir(os.path.join(paht_loo, type_list[t],"/RF/",target[i])) pred_val.to_csv(os.path.join(paht_loo, type_list[t],"/RF/",target[i], fname), sep=",") print('Predicted val result is saved') except: pass # LOO list print (doy) except: pass # doy print (yr) # yr print (target[i]) # target print (type_list[t]) # type
flist_temp = flist[k:k+14] doy = os.path.basename(flist_temp[0])[13:16] input_files = [] for m in range(0,14): dst_dataset = os.path.join(tmpdirname.name, f"NDVI_{doy}_{m+1}.tif") src_dataset = flist_temp[m] print (os.path.join(path_myd_raw, src_dataset)) gdal_dataset = gdal.Open(os.path.join(path_myd_raw, src_dataset)) src_dataset = gdal_dataset.GetSubDatasets()[0][0] cmd = ["gdal_translate", src_dataset, dst_dataset] subprocess.call(cmd) input_files.append(dst_dataset) tStart = time.time() dst_fname = os.path.join(path_myd_processed, '01mosaic', str(yr), f"MYD13A2_{yr}_{doy}.tif") matlab.check_make_dir(os.path.dirname(dst_fname)) # debugging pixel_type = 'Int16' in_nodata_val = "-3000" out_nodata_val = "-9999" compression = "COMPRESS=LZW" cmd = ["gdal_merge.py", "-n", in_nodata_val, "-a_nodata", out_nodata_val, "-ot", pixel_type] cmd += ["-co", compression] cmd += ["-o", dst_fname] cmd += input_files subprocess.call(cmd) tmpdirname.cleanup() tElapsed = time.time() - tStart print (f'time taken : {tElapsed}') print (os.path.basename(dst_fname)) print(doy)
band = masksrc.read(1) maskarr = (band!=255) shapes = [] for geometry, raster_value in features.shapes(band, mask=maskarr, transform=masksrc.transform): shapes.append(json.loads(json.dumps(geometry))) flist = glob.glob(os.path.join(path_mcd_processed, '01mosaic', "*.tif")) flist.sort() for src_dataset in flist: tStart = time.time() last_num = os.path.basename(src_dataset)[-8:] # b 2016.tif print (src_dataset) matlab.check_make_dir(os.path.join(path_mcd_processed, '02prj_GCS_WGS84')) # debugging matlab.check_make_dir(os.path.join(path_mcd_processed, '03masked_N50W110S20E150')) # debugging dst_dataset02 = os.path.join(path_mcd_processed, '02prj_GCS_WGS84', f'GCS_EA_MCD12Q1_{last_num}') # c dst_dataset03 = os.path.join(path_mcd_processed, '03masked_N50W110S20E150', f'm_MODIS_LC_500m_{last_num}') # d dst_crs = 'EPSG:4326' resolution = 5.11542231032757E-03 # same with maskfile resolution with rio.open(src_dataset) as src: transform, width, height = calculate_default_transform( src.crs, dst_crs, src.width, src.height, *src.bounds, resolution=resolution) kwargs = src.meta.copy() kwargs.update({
sys.path.insert(0, project_path) from Code.utils import matlab import numpy as np import glob import time import pandas as pd ### Setting path data_base_dir = os.path.join('/data2', 'sehyun', 'Data') path_grid_raw = os.path.join(data_base_dir, 'Raw', 'grid') path_ea_goci = os.path.join(data_base_dir, 'Preprocessed_raw', 'EA_GOCI6km') path_korea_cases = os.path.join(data_base_dir, 'Preprocessed_raw', 'Korea', 'cases') matlab.check_make_dir(path_korea_cases) path_station = os.path.join(data_base_dir, 'Preprocessed_raw', 'Station') path_stn_jp = os.path.join(path_station, 'Station_JP') path_stn_cn = os.path.join(path_station, 'Station_CN') path_stn_kr = os.path.join(path_station, 'Station_KR') tg = ['PM10', 'PM25'] ## Load grid mat = matlab.loadmat(os.path.join(path_grid_raw, 'grid_korea.mat')) lat_kor, lon_kor = mat['lat_kor'], mat['lon_kor'] del mat mat = matlab.loadmat( os.path.join(path_stn_kr,
doy = matlab.datenum(temp[19:23] + temp[24:28]) - doy_000 print(f'Reading OMNO2d {yr}_{doy:03d}') data = matlab.h5read( read_fname, '/HDFEOS/GRIDS/ColumnAmountNO2/Data Fields/ColumnAmountNO2TropCloudScreened' ) data = np.float64(data.T) # 720X1440 data[ data <= -1.2676506e+30] = np.nan # Assign NaN value to pixel that is out of valid range data = data * 3.7216e-17 data_yr[:, doy - 1] = data.ravel(order='F') out_fname = os.path.join(path_write, f'OMNO2d_trop_CS_{yr}_DU.mat') matlab.check_make_dir(os.path.dirname(out_fname)) data_yr[np.isnan(data_yr)] = -9999 matlab.savemat(out_fname, {'data_yr': data_yr}) tElapsed = time.time() - tStart print(f'{tElapsed} second') del data, data_yr print('==========================================================') ### OMSO2e print('OMSO2e') for yr in YEARS: tStart = time.time() doy_000 = matlab.datenum(f'{yr}0000') file_list = glob.glob( os.path.join(path_read, 'L3_grid', 'OMSO2e', str(yr), '*.he5')) file_list.sort()
### Setting path data_base_dir = os.path.join('/data2', 'sehyun', 'Data') path_myd_processed = os.path.join(data_base_dir, 'Preprocessed_raw', 'MODIS', 'MYD13A2') YEARS = [2016] for yr in YEARS: path_read = os.path.join(path_myd_processed, '01mosaic', str(yr)) flist = glob.glob(os.path.join(path_read, "*.tif")) flist.sort() for src_dataset in flist: tStart = time.time() matlab.check_make_dir( os.path.join(path_myd_processed, '02prj_GCS_WGS84', str(yr))) # debugging dst_dataset02 = os.path.join(path_myd_processed, '02prj_GCS_WGS84', str(yr), f'p_{os.path.basename(src_dataset)}') # c dst_crs = 'EPSG:4326' resolution = 1.02308446206551E-02 # same with maskfile resolution with rio.open(src_dataset) as src: transform, width, height = calculate_default_transform( src.crs, dst_crs, src.width, src.height, *src.bounds, resolution=resolution)
pred = pred.loc[:, features + [target[i]]] pred.fillna(-9999, inplace=True) name = f"{target[i]}_RTT_EA6km_{yr}_{doy:03d}_{utc:02d}" # Prediction result pred_cases = rf_model.predict( pred[features].values ) # predict(rf_model, data = pred) pred_cases = np.exp(pred_cases) # save pred prediction fname = f"rf_{name}.csv" pred_cases = pd.DataFrame(pred_cases) matlab.check_make_dir( os.path.join(path_rtt, type_list[t], "RF_pred", target[i])) pred_cases.to_csv(os.path.join(path_rtt, type_list[t], "RF_pred", target[i], fname), sep=",") # print('Predicted prediction result is saved') features = [ 'AOD', 'AE', 'FMF', 'SSA', 'NDVI', 'RSDN', 'Precip', 'DEM', 'LCurban', 'Temp', 'Dew', 'RH', 'P_srf', 'MaxWS', 'PBLH', 'Visibility', 'stack1_maxWS', 'stack3_maxWS', 'stack5_maxWS', 'stack7_maxWS', 'DOY', 'PopDens', 'RoadDens' ] # Validation result
data_base_dir = os.path.join('/data2', 'sehyun', 'Data') path_mcd_processed = os.path.join(data_base_dir, 'Preprocessed_raw', 'MODIS', 'MCD12Q1') class_name = [ "forest", "shrub", "savannas", "grass", "wetland", "crop", "urban", "snow", "barren", "water" ] YEARS = [2016] for yr in YEARS: tStart = time.time() src_dataset = os.path.join(path_mcd_processed, '03masked_N50W110S20E150', f'm_MODIS_LC_500m_{yr}.tif') dst_dataset = os.path.join(path_mcd_processed, '01_reclassified', f'reclass_MODIS_LC_500m_EA_{yr}.tif') matlab.check_make_dir(os.path.dirname(dst_dataset)) # Debugging matlab.check_make_dir( os.path.join(path_mcd_processed, '02_LC_binary', str(yr))) # Debugging with rio.open(src_dataset) as src: band = src.read(1).copy() band[(band >= 1) & (band < 6)] = 1 band[(band >= 6) & (band < 8)] = 2 band[(band >= 8) & (band < 10)] = 3 band[band == 10] = 4 band[band == 11] = 5 band[(band == 12) | (band == 14)] = 6 band[band == 13] = 7 band[band == 15] = 8 band[band == 16] = 9 band[(band == 17) | (band == src.meta['nodata'])] = 10
import scipy.io as sio import numpy as np import glob import time import pandas as pd ### Setting path data_base_dir = os.path.join('/data2', 'sehyun', 'Data') path_grid_raw = os.path.join(data_base_dir, 'Raw', 'grid') path_ea_goci = os.path.join(data_base_dir, 'Preprocessed_raw', 'EA_GOCI6km') path_rtt = os.path.join(data_base_dir, 'Preprocessed_raw', 'RTT') # path_save path_nox_korea = os.path.join(data_base_dir, 'Preprocessed_raw', 'NOX03', 'Korea') matlab.check_make_dir(path_nox_korea) path_korea_cases = os.path.join(data_base_dir, 'Preprocessed_raw', 'Korea', 'cases') path_station = os.path.join(data_base_dir, 'Preprocessed_raw', 'Station') path_stn_jp = os.path.join(path_station, 'Station_JP') path_stn_cn = os.path.join(path_station, 'Station_CN') path_stn_kr = os.path.join(path_station, 'Station_KR') path_data = '/share/irisnas5/Data/' path= '/share/irisnas5/GEMS/PM/00_EA6km/' target = ['PM10','PM25'] type_list = ['conc','time','time_conc'] YEARS = [2016] ## Load grid
print(accuracy[0, 1]) pred_pred = rf_model.predict(plots_pred[features]) pred_pred = np.exp(pred_pred) if (accuracy[0, 3] < max_accuracy): max_accuracy = accuracy[0, 3] ss = s print(rf_model.feature_importances_) name = f"{target[i]}_dataset" # save variable importance feature_imp = pd.Series( rf_model.feature_importances_).sort_values(ascending=False) fname = f"rf_{name}_imp_ranger.csv" matlab.check_make_dir(os.path.join(path_rtt, "RF/", target[i])) feature_imp.to_csv(os.path.join(path_rtt, "RF/", target[i], fname), sep=",") fname = f"rf_{name}_model_ranger.pickle" with open(os.path.join(path_rtt, "RF", target[i], fname), 'wb') as f: pickle.dump(rf_model, f) print(f'RF model is saved with {ss} of num.random.splits') parameter = rf_model.get_params( ) #f'RF model is saved with {ss} of num.random.splits' pd.DataFrame(parameter).to_csv(os.path.join( path_rtt, "RF/", target[i], f"rf_{name}_parameter_ranger.csv"), sep=",")
flist_temp = flist[k:k+14] yr = os.path.basename(flist_temp[0])[9:13] input_files = [] for m in range(0,14): fname = flist_temp[m] dst_dataset = os.path.join(tmpdirname.name, f"LC_{yr}_{m+1}.tif") gdal_dataset = gdal.Open(os.path.join(path_mcd_raw, fname)) src_dataset = gdal_dataset.GetSubDatasets()[0][0] subprocess.call(["gdal_translate", src_dataset, dst_dataset]) input_files.append(dst_dataset) # Mosaic tStart = time.time() matlab.check_make_dir(os.path.join(path_mcd_processed, '01mosaic')) # debugging dst_fname = os.path.join(path_mcd_processed, '01mosaic', f"EA_MCD12Q1_mosaic_{yr}.tif") pixel_type = 'Int16' in_nodata_val = "255" out_nodata_val = "-9999" compression = "COMPRESS=LZW" cmd = ["gdal_merge.py", "-n", in_nodata_val, "-a_nodata", out_nodata_val, "-ot", pixel_type] cmd += ["-co", compression] cmd += ["-o", dst_fname] cmd += input_files subprocess.call(cmd) tmpdirname.cleanup() tElapsed = time.time() - tStart print (f'time taken : {tElapsed}')
data[:,32] = Wcos.ravel(order='F') data[:,33] = Wsin.ravel(order='F') data[:,34] = AP3h.ravel(order='F') # ancillary data data[:,35] = np.sin((doy-112)*2*np.pi/365.25) # DOY data[:,36] = popDens.ravel(order='F') # Population Density data[:,37] = roadDens.ravel(order='F') # Road Density data[:,38:52]=EA_emis data[:,52:62]=LC_ratio # additional variables data[:,62]=omno2d_trop.ravel(order='F') data[np.isnan(data)] = -9999 matlab.check_make_dir(os.path.join(path_ea_goci, 'cases_csv', str(yr))) if yr>=2018: # BESS 없음 data[:,9]= 0 header_temp = header[:9]+header[10:] else: header_temp = header print ('data shape :', data.shape) tmp_df = pd.DataFrame(data, columns=header_temp) tmp_df.to_csv(os.path.join(path_ea_goci, 'cases_csv',str(yr),f'cases_EA6km_{yr}_{doy:03d}_{utc:02d}.csv')) del tmp_df data[data==-9999] = np.nan data_tbl = pd.DataFrame(data,columns=header_temp) print (data_tbl.to_dict('list').keys()) header_temp = np.array(data_tbl.columns, dtype=h5py.string_dtype(encoding='utf-8')) matlab.savemat(os.path.join(path_ea_goci, 'cases_mat', str(yr), f'cases_EA6km_{yr}_{doy:03d}_{utc:02d}.mat'),