Beispiel #1
0
def extract_data(delta=3):
    """
    This is the master function that calls subsequent functions
    to extract uwnd, vwnd, slp for the specified
    tide gauges
    
    delta: distance (in degrees) from the tide gauge
    """

    print('Delta =  {}'.format(delta), '\n')

    #defining the folders for predictors
    dir_in = "G:\\04_merra\\merraNetCDF"
    surge_path = "D:\data\obs_surge"
    csv_path = "G:\\04_merra\\merraNewLocalized"

    #cd to the obs_surge dir to get TG information
    os.chdir(surge_path)
    tg_list = os.listdir()

    #cd to the obs_surge dir to get TG information
    os.chdir(dir_in)
    years = os.listdir()

    #################################
    #looping through the year folders
    #################################

    for yr in years:
        os.chdir(dir_in)
        print(yr, '\n')
        os.chdir(os.path.join(dir_in, yr))

        ####################################
        #looping through the daily .nc files
        ####################################

        for dd in os.listdir():

            os.chdir(os.path.join(dir_in, yr))  #back to the predictor folder
            print(dd, '\n')

            #########################################
            #get netcdf components  - predictor file
            #########################################

            nc_file = readnetcdf(dd)
            lon, lat, time, predSLP, predU10, predV10 = \
                nc_file[0], nc_file[1], nc_file[2], nc_file[3], nc_file[4]\
                    , nc_file[5]

            #looping through individual tide gauges
            for t in range(0, len(tg_list)):

                #the name of the tide gauge - for saving purposes
                # tg = tg_list[t].split('.mat.mat.csv')[0]
                tg = tg_list[t]

                #extract lon and lat data from surge csv file
                print(tg, '\n')
                os.chdir(surge_path)

                if os.stat(tg).st_size == 0:
                    print('\n', "This tide gauge has no surge data!", '\n')
                    continue

                surge = pd.read_csv(tg, header=None)
                #surge_with_date = add_date(surge)

                #define tide gauge coordinate(lon, lat)
                tg_cord = Coordinate(surge.iloc[0, 0], surge.iloc[0, 1])

                #find closest grid points and their indices
                close_grids = findPixels(tg_cord, delta, lon, lat)
                ind_grids = findindx(close_grids, lon, lat)

                #loop through preds#
                #subset predictor on selected grid size
                predictors = {'slp':predSLP, 'wnd_u':predU10, \
                              'wnd_v':predV10}

                for xx in predictors.keys():

                    start_time = tt.time()

                    pred_new = subsetter(dd, predictors[xx], ind_grids, time)

                    print("--- %s seconds ---" % (tt.time() - start_time))

                    #create directories to save pred_new
                    os.chdir(csv_path)

                    #tide gauge directory
                    tg_name = tg.split('.mat.mat.csv')[0]

                    try:
                        os.makedirs(tg_name)
                        os.chdir(tg_name)  #cd to it after creating it
                    except FileExistsError:
                        #directory already exists
                        os.chdir(tg_name)

                    #predictor directory
                    pred_name = xx

                    try:
                        os.makedirs(pred_name)
                        os.chdir(pred_name)  #cd to it after creating it
                    except FileExistsError:
                        #directory already exists
                        os.chdir(pred_name)

                    #time for saving file
                    ncTime = dd.split('.')[2]

                    #save as csv
                    save_name = '_'.join([tg_name, pred_name, ncTime])\
                        + ".csv"
                    pred_new.to_csv(save_name)
def extract_data(delta=3):
    """
    This is the master function that calls subsequent functions
    to extract uwnd, vwnd, slp for the specified
    tide gauges
    
    delta: distance (in degrees) from the tide gauge
    """

    print('Delta =  {}'.format(delta), '\n')

    #defining the folders for predictors
    dir_in = "/lustre/fs0/home/mtadesse/MERRAv2/data"
    surge_path = "/lustre/fs0/home/mtadesse/obs_surge"
    csv_path = "/lustre/fs0/home/mtadesse/merraLocalized"

    #cd to the obs_surge dir to get TG information
    os.chdir(surge_path)
    tg_list = os.listdir()

    #cd to the obs_surge dir to get TG information
    os.chdir(dir_in)
    years = os.listdir()

    #################################
    #looping through the year folders
    #################################

    #to mark the first csv
    firstCsv = True

    for yr in years:
        os.chdir(dir_in)
        #print(yr, '\n')
        os.chdir(os.path.join(dir_in, yr))

        ####################################
        #looping through the daily .nc files
        ####################################

        for dd in os.listdir():

            os.chdir(os.path.join(dir_in, yr))  #back to the predictor folder
            print(dd, '\n')

            #########################################
            #get netcdf components  - predictor file
            #########################################

            nc_file = readnetcdf(dd)
            lon, lat, time, predSLP, predU10, predV10 = \
                nc_file[0], nc_file[1], nc_file[2], nc_file[3], nc_file[4]\
                    , nc_file[5]

            x = 376
            y = 377

            #looping through individual tide gauges
            for t in range(x, y):

                #the name of the tide gauge - for saving purposes
                # tg = tg_list[t].split('.mat.mat.csv')[0]
                tg = tg_list[t]

                #extract lon and lat data from surge csv file
                #print(tg, '\n')
                os.chdir(surge_path)

                if os.stat(tg).st_size == 0:
                    print('\n', "This tide gauge has no surge data!", '\n')
                    continue

                surge = pd.read_csv(tg, header=None)
                #surge_with_date = add_date(surge)

                #define tide gauge coordinate(lon, lat)
                tg_cord = Coordinate(surge.iloc[0, 0], surge.iloc[0, 1])

                #find closest grid points and their indices
                close_grids = findPixels(tg_cord, delta, lon, lat)
                ind_grids = findindx(close_grids, lon, lat)

                #loop through preds#
                #subset predictor on selected grid size
                predictors = {'slp':predSLP, 'wnd_u':predU10, \
                              'wnd_v':predV10}

                for xx in predictors.keys():

                    pred_new = subsetter(dd, predictors[xx], ind_grids, time)

                    if xx == 'slp':
                        if firstCsv:
                            finalSLP = pred_new
                        else:
                            finalSLP = pd.concat([finalSLP, pred_new], axis=0)
                            print(finalSLP.shape)
                    elif xx == 'wnd_u':
                        if firstCsv:
                            finalUwnd = pred_new
                        else:
                            finalUwnd = pd.concat([finalUwnd, pred_new],
                                                  axis=0)
                    elif xx == 'wnd_v':
                        if firstCsv:
                            finalVwnd = pred_new
                            firstCsv = False
                        else:
                            finalVwnd = pd.concat([finalVwnd, pred_new],
                                                  axis=0)

        #create directories to save pred_new
        os.chdir(csv_path)

        #tide gauge directory
        tg_name_old = tg.split('.mat.mat.csv')[0]
        tg_name = '-'.join([str(t), tg_name_old])
        try:
            os.makedirs(tg_name)
            os.chdir(tg_name)  #cd to it after creating it
        except FileExistsError:
            #directory already exists
            os.chdir(tg_name)

        #save as csv
        finalSLP.to_csv('slp.csv')
        finalUwnd.to_csv('wnd_u.csv')
        finalVwnd.to_csv('wnd_v.csv')
def extract_data(delta=3):
    """
    This is the master function that calls subsequent functions
    to extract uwnd, vwnd, slp for the specified
    tide gauges
    
    delta: distance (in degrees) from the tide gauge
    """

    print('Delta =  {}'.format(delta), '\n')

    #defining the folders for predictors
    dir_in = "/lustre/fs0/home/mtadesse/MERRAv2/data"
    surge_path = "/lustre/fs0/home/mtadesse/obs_surge"
    csv_path = "/lustre/fs0/home/mtadesse/merraLocalized"

    #cd to the obs_surge dir to get TG information
    os.chdir(surge_path)
    tg_list = os.listdir()

    #cd to the obs_surge dir to get TG information
    os.chdir(dir_in)
    years = os.listdir()

    #################################
    #looping through the year folders
    #################################

    for yr in years:
        os.chdir(dir_in)
        print(yr, '\n')
        os.chdir(os.path.join(dir_in, yr))

        ####################################
        #looping through the daily .nc files
        ####################################

        for dd in os.listdir():

            os.chdir(os.path.join(dir_in, yr))  #back to the predictor folder
            print(dd, '\n')

            #########################################
            #get netcdf components  - predictor file
            #########################################

            nc_file = readnetcdf(dd)
            lon, lat, time, predSLP, predU10, predV10 = \
                nc_file[0], nc_file[1], nc_file[2], nc_file[3], nc_file[4]\
                    , nc_file[5]

            x = startVal
            y = endVal

            #looping through individual tide gauges
            for t in range(x, y):

                #the name of the tide gauge - for saving purposes
                # tg = tg_list[t].split('.mat.mat.csv')[0]
                tg = tg_list[t]

                #extract lon and lat data from surge csv file
                print(tg, '\n')
                os.chdir(surge_path)

                if os.stat(tg).st_size == 0:
                    print('\n', "This tide gauge has no surge data!", '\n')
                    continue

                surge = pd.read_csv(tg, header=None)
                #surge_with_date = add_date(surge)

                #define tide gauge coordinate(lon, lat)
                tg_cord = Coordinate(surge.iloc[0, 0], surge.iloc[0, 1])

                #find closest grid points and their indices
                close_grids = findPixels(tg_cord, delta, lon, lat)
                ind_grids = findindx(close_grids, lon, lat)

                #loop through preds#
                #subset predictor on selected grid size
                predictors = {'slp':predSLP, 'wnd_u':predU10, \
                              'wnd_v':predV10}

                for xx in predictors.keys():

                    pred_new = subsetter(dd, predictors[xx], ind_grids, time)

                    #create directories to save pred_new
                    os.chdir(csv_path)

                    #tide gauge directory
                    tg_name_old = tg.split('.mat.mat.csv')[0]
                    tg_name = '-'.join([str(t), tg_name_old])
                    try:
                        os.makedirs(tg_name)
                        os.chdir(tg_name)  #cd to it after creating it
                    except FileExistsError:
                        #directory already exists
                        os.chdir(tg_name)

                    #predictor directory
                    pred_name = xx

                    try:
                        os.makedirs(pred_name)
                        os.chdir(pred_name)  #cd to it after creating it
                    except FileExistsError:
                        #directory already exists
                        os.chdir(pred_name)

                    #time for saving file
                    ncTime = dd.split('.')[2]

                    #save as csv
                    save_name = '_'.join([tg_name, pred_name, ncTime])\
                        + ".csv"
                    pred_new.to_csv(save_name)

                    #concatenate and delete csv on the fly
                    extension = 'csv'
                    all_filenames = [
                        i for i in glob.glob('*.{}'.format(extension))
                    ]
                    #combine all files in the list
                    combined_csv = pd.concat(
                        [pd.read_csv(f) for f in all_filenames])
                    #export to csv
                    combined_csv.to_csv('.'.join([xx, 'csv']),
                                        index=False,
                                        encoding='utf-8-sig')

                    #delete the individual file
                    for file in os.listdir():
                        if file.startswith(str(t)):
                            os.remove(file)