def extract_data(delta=3): """ This is the master function that calls subsequent functions to extract uwnd, vwnd, slp for the specified tide gauges delta: distance (in degrees) from the tide gauge """ print('Delta = {}'.format(delta), '\n') #defining the folders for predictors dir_in = "G:\\04_merra\\merraNetCDF" surge_path = "D:\data\obs_surge" csv_path = "G:\\04_merra\\merraNewLocalized" #cd to the obs_surge dir to get TG information os.chdir(surge_path) tg_list = os.listdir() #cd to the obs_surge dir to get TG information os.chdir(dir_in) years = os.listdir() ################################# #looping through the year folders ################################# for yr in years: os.chdir(dir_in) print(yr, '\n') os.chdir(os.path.join(dir_in, yr)) #################################### #looping through the daily .nc files #################################### for dd in os.listdir(): os.chdir(os.path.join(dir_in, yr)) #back to the predictor folder print(dd, '\n') ######################################### #get netcdf components - predictor file ######################################### nc_file = readnetcdf(dd) lon, lat, time, predSLP, predU10, predV10 = \ nc_file[0], nc_file[1], nc_file[2], nc_file[3], nc_file[4]\ , nc_file[5] #looping through individual tide gauges for t in range(0, len(tg_list)): #the name of the tide gauge - for saving purposes # tg = tg_list[t].split('.mat.mat.csv')[0] tg = tg_list[t] #extract lon and lat data from surge csv file print(tg, '\n') os.chdir(surge_path) if os.stat(tg).st_size == 0: print('\n', "This tide gauge has no surge data!", '\n') continue surge = pd.read_csv(tg, header=None) #surge_with_date = add_date(surge) #define tide gauge coordinate(lon, lat) tg_cord = Coordinate(surge.iloc[0, 0], surge.iloc[0, 1]) #find closest grid points and their indices close_grids = findPixels(tg_cord, delta, lon, lat) ind_grids = findindx(close_grids, lon, lat) #loop through preds# #subset predictor on selected grid size predictors = {'slp':predSLP, 'wnd_u':predU10, \ 'wnd_v':predV10} for xx in predictors.keys(): start_time = tt.time() pred_new = subsetter(dd, predictors[xx], ind_grids, time) print("--- %s seconds ---" % (tt.time() - start_time)) #create directories to save pred_new os.chdir(csv_path) #tide gauge directory tg_name = tg.split('.mat.mat.csv')[0] try: os.makedirs(tg_name) os.chdir(tg_name) #cd to it after creating it except FileExistsError: #directory already exists os.chdir(tg_name) #predictor directory pred_name = xx try: os.makedirs(pred_name) os.chdir(pred_name) #cd to it after creating it except FileExistsError: #directory already exists os.chdir(pred_name) #time for saving file ncTime = dd.split('.')[2] #save as csv save_name = '_'.join([tg_name, pred_name, ncTime])\ + ".csv" pred_new.to_csv(save_name)
def extract_data(delta=3): """ This is the master function that calls subsequent functions to extract uwnd, vwnd, slp for the specified tide gauges delta: distance (in degrees) from the tide gauge """ print('Delta = {}'.format(delta), '\n') #defining the folders for predictors dir_in = "/lustre/fs0/home/mtadesse/MERRAv2/data" surge_path = "/lustre/fs0/home/mtadesse/obs_surge" csv_path = "/lustre/fs0/home/mtadesse/merraLocalized" #cd to the obs_surge dir to get TG information os.chdir(surge_path) tg_list = os.listdir() #cd to the obs_surge dir to get TG information os.chdir(dir_in) years = os.listdir() ################################# #looping through the year folders ################################# #to mark the first csv firstCsv = True for yr in years: os.chdir(dir_in) #print(yr, '\n') os.chdir(os.path.join(dir_in, yr)) #################################### #looping through the daily .nc files #################################### for dd in os.listdir(): os.chdir(os.path.join(dir_in, yr)) #back to the predictor folder print(dd, '\n') ######################################### #get netcdf components - predictor file ######################################### nc_file = readnetcdf(dd) lon, lat, time, predSLP, predU10, predV10 = \ nc_file[0], nc_file[1], nc_file[2], nc_file[3], nc_file[4]\ , nc_file[5] x = 376 y = 377 #looping through individual tide gauges for t in range(x, y): #the name of the tide gauge - for saving purposes # tg = tg_list[t].split('.mat.mat.csv')[0] tg = tg_list[t] #extract lon and lat data from surge csv file #print(tg, '\n') os.chdir(surge_path) if os.stat(tg).st_size == 0: print('\n', "This tide gauge has no surge data!", '\n') continue surge = pd.read_csv(tg, header=None) #surge_with_date = add_date(surge) #define tide gauge coordinate(lon, lat) tg_cord = Coordinate(surge.iloc[0, 0], surge.iloc[0, 1]) #find closest grid points and their indices close_grids = findPixels(tg_cord, delta, lon, lat) ind_grids = findindx(close_grids, lon, lat) #loop through preds# #subset predictor on selected grid size predictors = {'slp':predSLP, 'wnd_u':predU10, \ 'wnd_v':predV10} for xx in predictors.keys(): pred_new = subsetter(dd, predictors[xx], ind_grids, time) if xx == 'slp': if firstCsv: finalSLP = pred_new else: finalSLP = pd.concat([finalSLP, pred_new], axis=0) print(finalSLP.shape) elif xx == 'wnd_u': if firstCsv: finalUwnd = pred_new else: finalUwnd = pd.concat([finalUwnd, pred_new], axis=0) elif xx == 'wnd_v': if firstCsv: finalVwnd = pred_new firstCsv = False else: finalVwnd = pd.concat([finalVwnd, pred_new], axis=0) #create directories to save pred_new os.chdir(csv_path) #tide gauge directory tg_name_old = tg.split('.mat.mat.csv')[0] tg_name = '-'.join([str(t), tg_name_old]) try: os.makedirs(tg_name) os.chdir(tg_name) #cd to it after creating it except FileExistsError: #directory already exists os.chdir(tg_name) #save as csv finalSLP.to_csv('slp.csv') finalUwnd.to_csv('wnd_u.csv') finalVwnd.to_csv('wnd_v.csv')
def extract_data(delta=3): """ This is the master function that calls subsequent functions to extract uwnd, vwnd, slp for the specified tide gauges delta: distance (in degrees) from the tide gauge """ print('Delta = {}'.format(delta), '\n') #defining the folders for predictors dir_in = "/lustre/fs0/home/mtadesse/MERRAv2/data" surge_path = "/lustre/fs0/home/mtadesse/obs_surge" csv_path = "/lustre/fs0/home/mtadesse/merraLocalized" #cd to the obs_surge dir to get TG information os.chdir(surge_path) tg_list = os.listdir() #cd to the obs_surge dir to get TG information os.chdir(dir_in) years = os.listdir() ################################# #looping through the year folders ################################# for yr in years: os.chdir(dir_in) print(yr, '\n') os.chdir(os.path.join(dir_in, yr)) #################################### #looping through the daily .nc files #################################### for dd in os.listdir(): os.chdir(os.path.join(dir_in, yr)) #back to the predictor folder print(dd, '\n') ######################################### #get netcdf components - predictor file ######################################### nc_file = readnetcdf(dd) lon, lat, time, predSLP, predU10, predV10 = \ nc_file[0], nc_file[1], nc_file[2], nc_file[3], nc_file[4]\ , nc_file[5] x = startVal y = endVal #looping through individual tide gauges for t in range(x, y): #the name of the tide gauge - for saving purposes # tg = tg_list[t].split('.mat.mat.csv')[0] tg = tg_list[t] #extract lon and lat data from surge csv file print(tg, '\n') os.chdir(surge_path) if os.stat(tg).st_size == 0: print('\n', "This tide gauge has no surge data!", '\n') continue surge = pd.read_csv(tg, header=None) #surge_with_date = add_date(surge) #define tide gauge coordinate(lon, lat) tg_cord = Coordinate(surge.iloc[0, 0], surge.iloc[0, 1]) #find closest grid points and their indices close_grids = findPixels(tg_cord, delta, lon, lat) ind_grids = findindx(close_grids, lon, lat) #loop through preds# #subset predictor on selected grid size predictors = {'slp':predSLP, 'wnd_u':predU10, \ 'wnd_v':predV10} for xx in predictors.keys(): pred_new = subsetter(dd, predictors[xx], ind_grids, time) #create directories to save pred_new os.chdir(csv_path) #tide gauge directory tg_name_old = tg.split('.mat.mat.csv')[0] tg_name = '-'.join([str(t), tg_name_old]) try: os.makedirs(tg_name) os.chdir(tg_name) #cd to it after creating it except FileExistsError: #directory already exists os.chdir(tg_name) #predictor directory pred_name = xx try: os.makedirs(pred_name) os.chdir(pred_name) #cd to it after creating it except FileExistsError: #directory already exists os.chdir(pred_name) #time for saving file ncTime = dd.split('.')[2] #save as csv save_name = '_'.join([tg_name, pred_name, ncTime])\ + ".csv" pred_new.to_csv(save_name) #concatenate and delete csv on the fly extension = 'csv' all_filenames = [ i for i in glob.glob('*.{}'.format(extension)) ] #combine all files in the list combined_csv = pd.concat( [pd.read_csv(f) for f in all_filenames]) #export to csv combined_csv.to_csv('.'.join([xx, 'csv']), index=False, encoding='utf-8-sig') #delete the individual file for file in os.listdir(): if file.startswith(str(t)): os.remove(file)