def interpolate_merra_hybrid(base_dir, EPSG, REGION, tdec, X, Y,
    VERSION='v1', VARIABLE='FAC', SIGMA=1.5, FILL_VALUE=None,
    EXTRAPOLATE=False):

    #-- set the input netCDF4 file for the variable of interest
    if VARIABLE in ('FAC','cum_smb_anomaly','height'):
        hybrid_file='gsfc_fdm_{0}_{1}.nc'.format(VERSION,REGION.lower())
    if VARIABLE in ('FAC') and (VERSION == 'v0'):
        hybrid_file='gsfc_{0}_{1}.nc'.format('FAC',REGION.lower())
    elif VARIABLE in ('p_minus_e','melt') and (VERSION == 'v0'):
        hybrid_file='m2_hybrid_{0}_cumul_{1}.nc'.format(VARIABLE,REGION.lower())

    #-- Open the MERRA-2 Hybrid NetCDF file for reading
    fileID = netCDF4.Dataset(os.path.join(base_dir,hybrid_file), 'r')
    #-- Get data from each netCDF variable and remove singleton dimensions
    fd = {}
    fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
    xg = fileID.variables['x'][:,:].copy()
    yg = fileID.variables['y'][:,:].copy()
    fd['time'] = fileID.variables['time'][:].copy()
    #-- invalid data value
    fv = np.float(fileID.variables[VARIABLE]._FillValue)
    #-- input shape of MERRA-2 Hybrid firn data
    nt,nx,ny = np.shape(fd[VARIABLE])
    #-- close the NetCDF files
    fileID.close()
    #-- time is year decimal at time step 5 days
    time_step = 5.0/365.25

    #-- indices of specified ice mask
    i,j = np.nonzero(fd[VARIABLE][0,:,:] != fv)
    #-- create mask object for interpolating data
    fd['mask'] = np.zeros((nx,ny))
    fd['mask'][i,j] = 1.0
    #-- extract x and y coordinate arrays from grids
    fd['x'],fd['y'] = (xg[:,0],yg[0,:])

    #-- use a gaussian filter to smooth mask
    gs = {}
    gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'], SIGMA,
        mode='constant', cval=0)
    #-- indices of smoothed ice mask
    ii,jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
    #-- use a gaussian filter to smooth each firn field
    gs[VARIABLE] = np.ma.zeros((nt,nx,ny), fill_value=fv)
    gs[VARIABLE].mask = np.zeros((nt,nx,ny), dtype=np.bool)
    for t in range(nt):
        #-- replace fill values before smoothing data
        temp1 = np.zeros((nx,ny))
        #-- reference to first firn field
        temp1[i,j] = fd[VARIABLE][t,i,j] - fd[VARIABLE][0,i,j]
        #-- smooth firn field
        temp2 = scipy.ndimage.gaussian_filter(temp1, SIGMA,
            mode='constant', cval=0)
        #-- scale output smoothed firn field
        gs[VARIABLE].data[t,ii,jj] = temp2[ii,jj]/gs['mask'][ii,jj]
        #-- replace valid firn values with original
        gs[VARIABLE].data[t,i,j] = temp1[i,j]
        #-- set mask variables for time
        gs[VARIABLE].mask[t,:,:] = (gs['mask'] == 0.0)

    #-- convert projection from input coordinates (EPSG) to model coordinates
    #-- MERRA-2 Hybrid models are rotated pole latitude and longitude
    MODEL_EPSG = set_projection(REGION)
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init={0}".format(MODEL_EPSG))
    ix,iy = pyproj.transform(proj1, proj2, X, Y)

    #-- check that input points are within convex hull of smoothed model points
    points = np.concatenate((xg[ii,jj,None],yg[ii,jj,None]),axis=1)
    triangle = scipy.spatial.Delaunay(points.data, qhull_options='Qt Qbb Qc Qz')
    interp_points = np.concatenate((ix[:,None],iy[:,None]),axis=1)
    valid = (triangle.find_simplex(interp_points) >= 0)

    #-- output interpolated arrays of variable
    npts = len(tdec)
    interp_data = np.ma.zeros((npts),fill_value=fv)
    #-- interpolation mask of invalid values
    interp_data.mask = np.ones((npts),dtype=np.bool)
    #-- type designating algorithm used (1: interpolate, 2: backward, 3:forward)
    interp_data.interpolation = np.zeros_like(tdec,dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min()) &
            (tdec <= fd['time'].max()) & valid)
        #-- create an interpolator for firn height or air content
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'],fd['x'],fd['y']), gs[VARIABLE].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'],fd['x'],fd['y']), gs[VARIABLE].mask)
        #-- interpolate to points
        interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind],ix[ind],iy[ind]])
        interp_data.mask[ind] = MI.__call__(np.c_[tdec[ind],ix[ind],iy[ind]])
        #-- set interpolation type (1: interpolated)
        interp_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['time'].min()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before firn model
        ind, = np.nonzero((tdec < fd['time'].min()) & valid)
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = np.int(10.0/time_step)
        #-- spatially interpolate variable to coordinates
        T = np.zeros((N))
        DATA = np.zeros((count,N))
        MASK = np.zeros((count,N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = fd['time'][k]
            #-- spatially interpolate variable and mask
            f1 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'],
                gs[VARIABLE].data[k,:,:], kx=1, ky=1)
            f2 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'],
                gs[VARIABLE].mask[k,:,:], kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:,k] = f1.ev(ix[ind],iy[ind])
            MASK[:,k] = f2.ev(ix[ind],iy[ind])
        #-- calculate regression model
        for n,v in enumerate(ind):
            interp_data.data[v] = regress_model(T, DATA[n,:], tdec[v], ORDER=2,
                CYCLES=[0.25,0.5,1.0,2.0,4.0,5.0], RELATIVE=T[0])
        #-- mask any invalid points
        interp_data.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['time'].max()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after firn model
        ind, = np.nonzero((tdec > fd['time'].max()) & valid)
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = np.int(10.0/time_step)
        #-- spatially interpolate variable to coordinates
        T = np.zeros((N))
        DATA = np.zeros((count,N))
        MASK = np.zeros((count,N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            T[k] = fd['time'][kk]
            #-- spatially interpolate firn elevation or air content
            fspl = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'],
                gs[VARIABLE][kk,:,:], kx=1, ky=1)
            #-- spatially interpolate variable and mask
            f1 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'],
                gs[VARIABLE].data[kk,:,:], kx=1, ky=1)
            f2 = scipy.interpolate.RectBivariateSpline(fd['x'], fd['y'],
                gs[VARIABLE].mask[kk,:,:], kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:,k] = f1.ev(ix[ind],iy[ind])
            MASK[:,k] = f2.ev(ix[ind],iy[ind])
        #-- calculate regression model
        for n,v in enumerate(ind):
            interp_data.data[v] = regress_model(T, FIRN[n,:], tdec[v], ORDER=2,
                CYCLES=[0.25,0.5,1.0,2.0,4.0,5.0], RELATIVE=T[-1])
        #-- mask any invalid points
        interp_data.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(interp_data.data == interp_data.fill_value)
    interp_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        interp_data.fill_value = FILL_VALUE
        interp_data.data[interp_data.mask] = interp_data.fill_value

    #-- return the interpolated values
    return interp_data
Beispiel #2
0
def interpolate_racmo_firn(base_dir,
                           EPSG,
                           MODEL,
                           tdec,
                           X,
                           Y,
                           VARIABLE='zs',
                           SIGMA=1.5,
                           FILL_VALUE=None,
                           REFERENCE=False):

    #-- set parameters based on input model
    FIRN_FILE = {}
    if (MODEL == 'FGRN11'):
        #-- filename and directory for input FGRN11 file
        FIRN_FILE['zs'] = 'FDM_zs_FGRN11_1960-2016.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN11_1960-2016.nc'
        FIRN_DIRECTORY = ['RACMO', 'FGRN11_1960-2016']
        #-- time is year decimal from 1960-01-01 at time_step 10 days
        time_step = 10.0 / 365.25
        #-- rotation parameters
        rot_lat = -18.0
        rot_lon = -37.5
    elif (MODEL == 'FGRN055'):
        #-- filename and directory for input FGRN055 file
        FIRN_FILE['zs'] = 'FDM_zs_FGRN055_1960-2017_interpol.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN055_1960-2017_interpol.nc'
        FIRN_DIRECTORY = ['RACMO', 'FGRN055_1960-2017']
        #-- time is year decimal from 1960-01-01 at time_step 10 days
        time_step = 10.0 / 365.25
        #-- rotation parameters
        rot_lat = -18.0
        rot_lon = -37.5
    elif (MODEL == 'XANT27'):
        #-- filename and directory for input XANT27 file
        FIRN_FILE['zs'] = 'FDM_zs_ANT27_1979-2016.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ANT27_1979-2016.nc'
        FIRN_DIRECTORY = ['RACMO', 'XANT27_1979-2016']
        #-- time is year decimal from 1979-01-01 at time_step 10 days
        time_step = 10.0 / 365.25
        #-- rotation parameters
        rot_lat = -180.0
        rot_lon = 10.0
    elif (MODEL == 'ASE055'):
        #-- filename and directory for input ASE055 file
        FIRN_FILE['zs'] = 'FDM_zs_ASE055_1979-2015.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ASE055_1979-2015.nc'
        FIRN_DIRECTORY = ['RACMO', 'ASE055_1979-2015']
        #-- time is year decimal from 1979-01-01 at time_step 10 days
        time_step = 10.0 / 365.25
        #-- rotation parameters
        rot_lat = 167.0
        rot_lon = 53.0
    elif (MODEL == 'XPEN055'):
        #-- filename and directory for input XPEN055 file
        FIRN_FILE['zs'] = 'FDM_zs_XPEN055_1979-2016.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_XPEN055_1979-2016.nc'
        FIRN_DIRECTORY = ['RACMO', 'XPEN055_1979-2016']
        #-- time is year decimal from 1979-01-01 at time_step 10 days
        time_step = 10.0 / 365.25
        #-- rotation parameters
        rot_lat = -180.0
        rot_lon = 30.0

    #-- Open the RACMO NetCDF file for reading
    ddir = os.path.join(base_dir, *FIRN_DIRECTORY)
    fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE[VARIABLE]), 'r')
    fd = {}
    #-- invalid data value
    fv = np.float(fileID.variables[VARIABLE]._FillValue)
    #-- Get data from each netCDF variable and remove singleton dimensions
    fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
    #-- verify mask object for interpolating data
    fd[VARIABLE].mask |= (fd[VARIABLE].data[c:c + t, :, :] == fv)
    fd['lon'] = fileID.variables['lon'][:, :].copy()
    fd['lat'] = fileID.variables['lat'][:, :].copy()
    fd['time'] = fileID.variables['time'][:].copy()
    #-- input shape of RACMO firn data
    nt, ny, nx = np.shape(fd[VARIABLE])
    #-- close the NetCDF files
    fileID.close()

    #-- indices of specified ice mask
    i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv)
    #-- create mask object for interpolating data
    fd['mask'] = np.zeros((ny, nx))
    fd['mask'][i, j] = 1.0

    #-- use a gaussian filter to smooth mask
    gs = {}
    gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                               SIGMA,
                                               mode='constant',
                                               cval=0)
    #-- indices of smoothed ice mask
    ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
    #-- use a gaussian filter to smooth each firn field
    gs[VARIABLE] = np.ma.zeros((nt, ny, nx), fill_value=fv)
    gs[VARIABLE].mask = np.ma.zeros((nt, ny, nx), dtype=np.bool)
    for t in range(nt):
        #-- replace fill values before smoothing data
        temp1 = np.zeros((ny, nx))
        #-- reference to first firn field
        if REFERENCE:
            temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j]
        else:
            temp1[i, j] = fd[VARIABLE][t, i, j].copy()
        #-- smooth firn field
        temp2 = scipy.ndimage.gaussian_filter(temp1,
                                              SIGMA,
                                              mode='constant',
                                              cval=0)
        #-- scale output smoothed firn field
        gs[VARIABLE][t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
        #-- replace valid firn values with original
        gs[VARIABLE][t, i, j] = temp1[i, j]
        #-- set mask variables for time
        gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0)

    #-- rotated pole longitude and latitude of input model (model coordinates)
    xg, yg = rotate_coordinates(fd['lon'], fd['lat'], rot_lon, rot_lat)
    #-- recreate arrays to fix small floating point errors
    #-- (ensure that arrays are monotonically increasing)
    fd['x'] = np.linspace(np.mean(xg[:, 0]), np.mean(xg[:, -1]), nx)
    fd['y'] = np.linspace(np.mean(yg[0, :]), np.mean(yg[-1, :]), ny)

    #-- convert projection from input coordinates (EPSG) to model coordinates
    #-- RACMO models are rotated pole latitude and longitude
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326))
    ilon, ilat = pyproj.transform(proj1, proj2, X, Y)
    #-- calculate rotated pole coordinates of input coordinates
    ix, iy = rotate_coordinates(ilon, ilat, rot_lon, rot_lat)

    #-- check that input points are within convex hull of smoothed model points
    points = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1)
    triangle = scipy.spatial.Delaunay(points.data,
                                      qhull_options='Qt Qbb Qc Qz')
    interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1)
    valid = (triangle.find_simplex(interp_points) >= 0)

    #-- output interpolated arrays of firn variable (height or firn air content)
    npts = len(tdec)
    interp_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    interp_data.mask = np.ones((npts), dtype=np.bool)
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    interp_data.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec <= fd['time'].max()) & valid)

        #-- create an interpolator for model variable
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['y'], fd['x']), gs[VARIABLE].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['y'], fd['x']), gs[VARIABLE].mask)

        #-- interpolate to points
        interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind],
                                                   ix[ind]])
        interp_data.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        #-- set interpolation type (1: interpolated)
        interp_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['time'].min()) & valid)
    if (count > 0):
        #-- indices of dates before firn model
        ind, = np.nonzero((tdec < fd['time'].min()) & valid)
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = 365
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = fd['time'][k]
            #-- spatially interpolate firn elevation or air content
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].data[k, :, :].T, kx=1, ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].mask[k, :, :].T, kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            FIRN[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp_data.data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[0])
        #-- mask any invalid points
        interp_data.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['time'].max()) & valid)
    if (count > 0):
        #-- indices of dates after firn model
        ind, = np.nonzero((tdec > fd['time'].max()) & valid)
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = 365
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        T = np.zeros((N))
        #-- spatially interpolate mask to coordinates
        mspl = scipy.interpolate.RectBivariateSpline(fd['x'],
                                                     fd['y'],
                                                     fd['mask'].T,
                                                     kx=1,
                                                     ky=1)
        interp_mask[ind] = mspl.ev(ix[ind], iy[ind])
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at k
            T[k] = fd['time'][kk]
            #-- spatially interpolate firn elevation or air content
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].data[kk, :, :].T, kx=1, ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].mask[kk, :, :].T, kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            FIRN[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp_data.data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[-1])
        #-- mask any invalid points
        interp_data.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(interp_data.data == interp_data.fill_value)
    interp_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        interp_data.fill_value = FILL_VALUE
        interp_data.data[interp_data.mask] = interp_data.fill_value

    #-- return the interpolated values
    return interp_data
Beispiel #3
0
def extrapolate_racmo_downscaled(base_dir,
                                 EPSG,
                                 VERSION,
                                 tdec,
                                 X,
                                 Y,
                                 VARIABLE='SMB',
                                 SEARCH='BallTree',
                                 NN=10,
                                 POWER=2.0,
                                 FILL_VALUE=None):

    #-- Full Directory Setup
    DIRECTORY = 'SMB1km_v{0}'.format(VERSION)

    #-- netcdf variable names
    input_products = {}
    input_products['SMB'] = 'SMB_rec'
    input_products['PRECIP'] = 'precip'
    input_products['RUNOFF'] = 'runoff'
    input_products['SNOWMELT'] = 'snowmelt'
    input_products['REFREEZE'] = 'refreeze'
    #-- version 1 was in separate files for each year
    if (VERSION == '1.0'):
        RACMO_MODEL = ['XGRN11', '2.3']
        VARNAME = input_products[VARIABLE]
        SUBDIRECTORY = '{0}_v{1}'.format(VARNAME, VERSION)
        input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY, SUBDIRECTORY)
    elif (VERSION == '2.0'):
        RACMO_MODEL = ['XGRN11', '2.3p2']
        var = input_products[VARIABLE]
        VARNAME = var if VARIABLE in ('SMB',
                                      'PRECIP') else '{0}corr'.format(var)
        input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY)
    elif (VERSION == '3.0'):
        RACMO_MODEL = ['FGRN055', '2.3p2']
        var = input_products[VARIABLE]
        VARNAME = var if (VARIABLE == 'SMB') else '{0}corr'.format(var)
        input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY)
    #-- input cumulative netCDF4 file
    args = (RACMO_MODEL[0], RACMO_MODEL[1], VERSION, VARIABLE)
    input_file = '{0}_RACMO{1}_DS1km_v{2}_{3}_cumul.nc'.format(*args)

    #-- Open the RACMO NetCDF file for reading
    fileID = netCDF4.Dataset(os.path.join(input_dir, input_file), 'r')
    #-- input shape of RACMO data
    nt, ny, nx = fileID[VARNAME].shape
    #-- Get data from each netCDF variable
    d = {}
    #-- cell origins on the bottom right
    dx = np.abs(fileID.variables['x'][1] - fileID.variables['x'][0])
    dy = np.abs(fileID.variables['y'][1] - fileID.variables['y'][0])
    #-- latitude and longitude arrays at center of each cell
    d['LON'] = fileID.variables['LON'][:, :].copy()
    d['LAT'] = fileID.variables['LAT'][:, :].copy()
    #-- extract time (decimal years)
    d['TIME'] = fileID.variables['TIME'][:].copy()
    #-- mask object for interpolating data
    d['MASK'] = np.array(fileID.variables['MASK'][:], dtype=bool)
    i, j = np.nonzero(d['MASK'])

    #-- convert RACMO latitude and longitude to input coordinates (EPSG)
    crs1 = pyproj.CRS.from_string(EPSG)
    crs2 = pyproj.CRS.from_string("epsg:{0:d}".format(4326))
    transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)
    direction = pyproj.enums.TransformDirection.INVERSE
    #-- convert projection from model coordinates
    xg, yg = transformer.transform(d['LON'], d['LAT'], direction=direction)

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output extrapolated arrays of variable
    npts = len(tdec)
    extrap_data = np.ma.zeros((npts), dtype=np.float)
    extrap_data.data[:] = extrap_data.fill_value
    extrap_data.mask = np.zeros((npts), dtype=bool)
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap_data.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be extrapolated
    if np.any((tdec >= d['TIME'].min()) & (tdec <= d['TIME'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= d['TIME'].min()) & (tdec < d['TIME'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- determine which subset of time to read from the netCDF4 file
        f = scipy.interpolate.interp1d(d['TIME'],
                                       np.arange(nt),
                                       kind='linear',
                                       fill_value=(0, nt - 1),
                                       bounds_error=False)
        date_indice = f(tind).astype(np.int)
        #-- for each unique RACMO date
        #-- linearly interpolate in time between two RACMO maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the NN closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=NN, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, NN))
            #-- RACMO variables for times before and after tdec
            var1 = fileID.variables[VARNAME][k, i, j].copy()
            var2 = fileID.variables[VARNAME][k + 1, i, j].copy()
            #-- linearly interpolate to date
            dt = (tind[kk] - d['TIME'][k]) / (d['TIME'][k + 1] - d['TIME'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap_data[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \
                dt*np.sum(w*var2[indices], axis=1)
            extrap_data
        #-- set interpolation type (1: interpolated in time)
        extrap_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < d['TIME'].min()))
    if (count > 0):
        #-- indices of dates before RACMO
        ind, = np.nonzero(tdec < d['TIME'].min())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = 120
        #-- spatially interpolate variables to coordinates
        VAR = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = d['TIME'][k]
            #-- spatially extrapolate variables
            var1 = fileID.variables[VARNAME][k, i, j].copy()
            VAR[:, k] = np.sum(w * var1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                VAR[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > d['TIME'].max()))
    if (count > 0):
        #-- indices of dates after RACMO
        ind, = np.nonzero(tdec >= d['TIME'].max())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = 120
        #-- spatially interpolate variables to coordinates
        FIRN = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at k
            T[k] = d['TIME'][kk]
            #-- spatially extrapolate variables
            var1 = fileID.variables[VARNAME][kk, i, j].copy()
            VAR[:, k] = np.sum(w * var1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                VAR[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[-1])
        #-- set interpolation type (3: extrapolated forward in time)
        extrap_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value)
    extrap_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        extrap_data.data[extrap_data.mask] = FILL_VALUE
        extrap_data.fill_value = FILL_VALUE

    #-- close the NetCDF files
    fileID.close()

    #-- return the extrapolated values
    return extrap_data
def interpolate_merra_hybrid(base_dir,
                             EPSG,
                             REGION,
                             tdec,
                             X,
                             Y,
                             VERSION='v1',
                             VARIABLE='FAC',
                             SIGMA=1.5,
                             FILL_VALUE=None,
                             EXTRAPOLATE=False,
                             GZIP=False):

    #-- suffix if compressed
    suffix = '.gz' if GZIP else ''
    #-- set the input netCDF4 file for the variable of interest
    if VARIABLE in ('FAC', 'cum_smb_anomaly', 'SMB_a', 'height', 'h_a'):
        args = (VERSION, REGION.lower(), suffix)
        hybrid_file = 'gsfc_fdm_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('smb', 'SMB', 'Me', 'Ra', 'Ru', 'Sn-Ev'):
        args = (VERSION, REGION.lower(), suffix)
        hybrid_file = 'gsfc_fdm_smb_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('Me_a', 'Ra_a', 'Ru_a', 'Sn-Ev_a'):
        args = (VERSION, REGION.lower(), suffix)
        hybrid_file = 'gsfc_fdm_smb_cumul_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('FAC') and (VERSION == 'v0'):
        args = ('FAC', REGION.lower(), suffix)
        hybrid_file = 'gsfc_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('p_minus_e', 'melt') and (VERSION == 'v0'):
        args = (VARIABLE, REGION.lower(), suffix)
        hybrid_file = 'm2_hybrid_{0}_cumul_{1}.nc{2}'.format(*args)

    #-- Open the MERRA-2 Hybrid NetCDF file for reading
    if GZIP:
        #-- read as in-memory (diskless) netCDF4 dataset
        with gzip.open(os.path.join(base_dir, hybrid_file), 'r') as f:
            fileID = netCDF4.Dataset(uuid.uuid4().hex, memory=f.read())
    else:
        #-- read netCDF4 dataset
        fileID = netCDF4.Dataset(os.path.join(base_dir, hybrid_file), 'r')

    #-- Get data from each netCDF variable and remove singleton dimensions
    fd = {}
    #-- time is year decimal at time step 5 days
    time_step = 5.0 / 365.25
    #-- if extrapolating data: read the full dataset
    #-- if simply interpolating with fill values: reduce to a subset
    if EXTRAPOLATE:
        #-- read time variables
        fd['time'] = fileID.variables['time'][:].copy()
        #-- read full dataset and remove singleton dimensions
        fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
    else:
        #-- reduce grids to time period of input buffered by time steps
        tmin = np.min(tdec) - 2.0 * time_step
        tmax = np.max(tdec) + 2.0 * time_step
        #-- find indices to times
        nt, = fileID.variables['time'].shape
        f = scipy.interpolate.interp1d(fileID.variables['time'][:],
                                       np.arange(nt),
                                       kind='nearest',
                                       bounds_error=False,
                                       fill_value=(0, nt))
        imin, imax = f((tmin, tmax)).astype(np.int)
        #-- read reduced time variables
        fd['time'] = fileID.variables['time'][imin:imax + 1].copy()
        #-- read reduced dataset and remove singleton dimensions
        fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][imin:imax +
                                                             1, :, :])
    #-- invalid data value
    fv = np.float(fileID.variables[VARIABLE]._FillValue)
    #-- input shape of MERRA-2 Hybrid firn data
    nt, nx, ny = np.shape(fd[VARIABLE])
    #-- extract x and y coordinate arrays from grids if applicable
    #-- else create meshgrids of coordinate arrays
    if (np.ndim(fileID.variables['x'][:]) == 2):
        xg = fileID.variables['x'][:].copy()
        yg = fileID.variables['y'][:].copy()
        fd['x'], fd['y'] = (xg[:, 0], yg[0, :])
    else:
        fd['x'] = fileID.variables['x'][:].copy()
        fd['y'] = fileID.variables['y'][:].copy()
        xg, yg = np.meshgrid(fd['x'], fd['y'], indexing='ij')
    #-- close the NetCDF files
    fileID.close()

    #-- indices of specified ice mask
    i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv)
    #-- create mask object for interpolating data
    fd['mask'] = np.zeros((nx, ny))
    fd['mask'][i, j] = 1.0

    #-- use a gaussian filter to smooth mask
    gs = {}
    gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                               SIGMA,
                                               mode='constant',
                                               cval=0)
    #-- indices of smoothed ice mask
    ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
    #-- use a gaussian filter to smooth each firn field
    gs[VARIABLE] = np.ma.zeros((nt, nx, ny), fill_value=fv)
    gs[VARIABLE].mask = np.zeros((nt, nx, ny), dtype=bool)
    for t in range(nt):
        #-- replace fill values before smoothing data
        temp1 = np.zeros((nx, ny))
        #-- reference to first firn field
        temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j]
        #-- smooth firn field
        temp2 = scipy.ndimage.gaussian_filter(temp1,
                                              SIGMA,
                                              mode='constant',
                                              cval=0)
        #-- scale output smoothed firn field
        gs[VARIABLE].data[t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
        #-- replace valid firn values with original
        gs[VARIABLE].data[t, i, j] = temp1[i, j]
        #-- set mask variables for time
        gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0)

    #-- convert projection from input coordinates (EPSG) to model coordinates
    MODEL_EPSG = set_projection(REGION)
    crs1 = pyproj.CRS.from_string(EPSG)
    crs2 = pyproj.CRS.from_string(MODEL_EPSG)
    transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)
    #-- calculate projected coordinates of input coordinates
    ix, iy = transformer.transform(X, Y)

    #-- check that input points are within convex hull of smoothed model points
    v, triangle = find_valid_triangulation(xg[ii, jj], yg[ii, jj])
    #-- check if there is a valid triangulation
    if v:
        #-- check where points are within the complex hull of the triangulation
        interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1)
        valid = (triangle.find_simplex(interp_points) >= 0)
    else:
        #-- Check ix and iy against the bounds of x and y
        valid = (ix >= fd['x'].min()) & (ix <= fd['x'].max()) & \
            (iy >= fd['y'].min()) & (iy <= fd['y'].max())

    #-- output interpolated arrays of variable
    npts = len(tdec)
    interp_data = np.ma.zeros((npts), fill_value=fv)
    #-- interpolation mask of invalid values
    interp_data.mask = np.ones((npts), dtype=bool)
    #-- type designating algorithm used (1: interpolate, 2: backward, 3:forward)
    interp_data.interpolation = np.zeros_like(tdec, dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec <= fd['time'].max()) & valid)
        #-- create an interpolator for firn height or air content
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['x'], fd['y']), gs[VARIABLE].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['x'], fd['y']), gs[VARIABLE].mask)
        #-- interpolate to points
        interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind], ix[ind],
                                                   iy[ind]])
        interp_data.mask[ind] = MI.__call__(np.c_[tdec[ind], ix[ind], iy[ind]])
        #-- set interpolation type (1: interpolated)
        interp_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['time'].min()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before firn model
        ind, = np.nonzero((tdec < fd['time'].min()) & valid)
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = np.int(10.0 / time_step)
        #-- spatially interpolate variable to coordinates
        T = np.zeros((N))
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = fd['time'][k]
            #-- spatially interpolate variable and mask
            f1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].data[k, :, :], kx=1, ky=1)
            f2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].mask[k, :, :], kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = f1.ev(ix[ind], iy[ind])
            MASK[:, k] = f2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp_data.data[v] = regress_model(
                T,
                DATA[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[0])
        #-- mask any invalid points
        interp_data.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['time'].max()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after firn model
        ind, = np.nonzero((tdec > fd['time'].max()) & valid)
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = np.int(10.0 / time_step)
        #-- spatially interpolate variable to coordinates
        T = np.zeros((N))
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            T[k] = fd['time'][kk]
            #-- spatially interpolate firn elevation or air content
            fspl = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE][kk, :, :], kx=1, ky=1)
            #-- spatially interpolate variable and mask
            f1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].data[kk, :, :], kx=1, ky=1)
            f2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs[VARIABLE].mask[kk, :, :], kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = f1.ev(ix[ind], iy[ind])
            MASK[:, k] = f2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp_data.data[v] = regress_model(
                T,
                DATA[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[-1])
        #-- mask any invalid points
        interp_data.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(interp_data.data == interp_data.fill_value)
    interp_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        interp_data.fill_value = FILL_VALUE
        interp_data.data[interp_data.mask] = interp_data.fill_value

    #-- return the interpolated values
    return interp_data
Beispiel #5
0
def extrapolate_mar_daily(DIRECTORY,
                          EPSG,
                          VERSION,
                          tdec,
                          X,
                          Y,
                          XNAME=None,
                          YNAME=None,
                          TIMENAME='TIME',
                          VARIABLE='SMB',
                          SIGMA=1.5,
                          SEARCH='BallTree',
                          NN=10,
                          POWER=2.0,
                          FILL_VALUE=None,
                          EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- regular expression pattern for MAR dataset
    rx = re.compile(r'{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS))

    #-- create list of files to read
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables[XNAME][:])
            ny = len(fileID.variables[YNAME][:])
            TIME = fileID.variables[TIMENAME][:]
            try:
                nt += np.count_nonzero(TIME.data != TIME.fill_value)
            except AttributeError:
                nt += len(TIME)

    #-- python dictionary with file variables
    fd = {}
    fd['TIME'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE)
    gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            TIME = fileID.variables['TIME'][:]
            try:
                t = np.count_nonzero(TIME.data != TIME.fill_value)
            except AttributeError:
                t = len(TIME)
            #-- create a masked array with all data
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
            fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=np.bool)
            #-- surface type
            SRF = fileID.variables['SRF'][:]
            #-- indices of specified ice mask
            i, j = np.nonzero(SRF == 4)
            #-- ice fraction
            FRA = fileID.variables['FRA'][:] / 100.0
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            #-- combine sectors for multi-layered data
            if (np.ndim(tmp) == 4):
                #-- create mask for combining data
                MASK = np.zeros((t, ny, nx))
                MASK[:, i, j] = FRA[:t, 0, i, j]
                #-- combine data
                fd[VARIABLE][:] = MASK * tmp[:t, 0, :, :] + (
                    1.0 - MASK) * tmp[:t, 1, :, :]
            else:
                #-- copy data
                fd[VARIABLE][:] = tmp[:t, :, :].copy()
            #-- verify mask object for interpolating data
            surf_mask = np.broadcast_to(SRF, (t, ny, nx))
            fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4)
            #-- combine mask object through time to create a single mask
            fd[VARIABLE].mask = fd[VARIABLE].data == fd[VARIABLE].fill_value
            fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- MAR coordinates
            fd['LON'] = fileID.variables['LON'][:, :].copy()
            fd['LAT'] = fileID.variables['LAT'][:, :].copy()
            #-- convert x and y coordinates to meters
            fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy()
            fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy()
            #-- extract delta time and epoch of time
            delta_time = fileID.variables[TIMENAME][:t].astype(np.float)
            units = fileID.variables[TIMENAME].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['TIME'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE].data[tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['CUMULATIVE'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert MAR latitude and longitude to input coordinates (EPSG)
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326))
    xg, yg = pyproj.transform(proj2, proj1, fd['LON'], fd['LAT'])

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output interpolated arrays of output variable
    npts = len(tdec)
    extrap = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float)
    extrap.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    extrap.data[:] = extrap.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['TIME'].min()) & (tdec < fd['TIME'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['TIME'].min())
                          & (tdec < fd['TIME'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- find indices for linearly interpolating in time
        f = scipy.interpolate.interp1d(fd['TIME'],
                                       np.arange(nt),
                                       kind='linear')
        date_indice = f(tind).astype(np.int)
        #-- for each unique model date
        #-- linearly interpolate in time between two model maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the NN closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=NN, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, NN))
            #-- variable for times before and after tdec
            var1 = gs['CUMULATIVE'][k, i, j]
            var2 = gs['CUMULATIVE'][k + 1, i, j]
            #-- linearly interpolate to date
            dt = (tind[kk] - fd['TIME'][k]) / (fd['TIME'][k + 1] -
                                               fd['TIME'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap.data[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \
                dt*np.sum(w*var2[indices], axis=1)
        #-- set interpolation type (1: interpolated in time)
        extrap.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero(tdec < fd['TIME'].min())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero(tdec < fd['TIME'].min())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['TIME'][k]
            #-- spatially extrapolate variable
            tmp = gs['CUMULATIVE'][k, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero(tdec >= fd['TIME'].max())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after model
        ind, = np.nonzero(tdec >= fd['TIME'].max())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['TIME'][kk]
            #-- spatially extrapolate variable
            tmp = gs['CUMULATIVE'][kk, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[-1])
        #-- set interpolation type (3: extrapolated forward in time)
        extrap.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((extrap.data == extrap.fill_value)
                          | np.isnan(extrap.data))
    extrap.mask[invalid] = True

    #-- return the interpolated values
    return extrap
Beispiel #6
0
def interpolate_racmo_daily(base_dir,
                            EPSG,
                            MODEL,
                            tdec,
                            X,
                            Y,
                            VARIABLE='smb',
                            SIGMA=1.5,
                            FILL_VALUE=None,
                            EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- input list of files
    if (MODEL == 'FGRN055'):
        #-- filename and directory for input FGRN055 files
        file_pattern = 'RACMO2.3p2_FGRN055_{0}_daily_(\d+).nc'
        DIRECTORY = os.path.join(base_dir, 'RACMO', 'GL', 'RACMO2.3p2_FGRN055')

    #-- create list of files to read
    rx = re.compile(file_pattern.format(VARIABLE, YRS), re.VERBOSE)
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables['rlon'][:])
            ny = len(fileID.variables['rlat'][:])
            nt += len(fileID.variables['time'][:])
            #-- invalid data value
            fv = np.float(fileID.variables[VARIABLE]._FillValue)

    #-- scaling factor for converting units
    if (VARIABLE == 'hgtsrf'):
        scale_factor = 86400.0
    elif (VARIABLE == 'smb'):
        scale_factor = 1.0

    #-- python dictionary with file variables
    fd = {}
    fd['time'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['cumulative'] = np.ma.zeros((nt, ny, nx), fill_value=fv)
    gs['cumulative'].mask = np.zeros((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            t = len(fileID.variables['time'][:])
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
            fd[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            fd[VARIABLE][:] = scale_factor * tmp
            #-- indices of specified ice mask
            i, j = np.nonzero(tmp[0, :, :] != fv)
            fd[VARIABLE].mask[:, i, j] = False
            #-- combine mask object through time to create a single mask
            fd['mask'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- racmo coordinates
            fd['lon'] = fileID.variables['lon'][:, :].copy()
            fd['lat'] = fileID.variables['lat'][:, :].copy()
            fd['x'] = fileID.variables['rlon'][:].copy()
            fd['y'] = fileID.variables['rlat'][:].copy()
            #-- rotated pole parameters
            proj4_params = fileID.variables['rotated_pole'].proj4_params
            #-- extract delta time and epoch of time
            delta_time = fileID.variables['time'][:].astype(np.float)
            units = fileID.variables['time'].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['time'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE][tt, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE][tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['cumulative'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['cumulative'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert projection from input coordinates (EPSG) to model coordinates
    #-- RACMO models are rotated pole latitude and longitude
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj(proj4_params)
    #-- calculate rotated pole coordinates of input coordinates
    ix, iy = pyproj.transform(proj1, proj2, X, Y)

    #-- check that input points are within convex hull of valid model points
    gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y'])
    v, triangle = find_valid_triangulation(gs['x'][ii, jj], gs['y'][ii, jj])
    #-- check where points are within the complex hull of the triangulation
    if v:
        interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1)
        valid = (triangle.find_simplex(interp_points) >= 0)
    else:
        #-- Check ix and iy against the bounds of x and y
        valid = (ix >= fd['x'].min()) & (ix <= fd['x'].max()) & \
            (iy >= fd['y'].min()) & (iy <= fd['y'].max())

    #-- output interpolated arrays of model variable
    npts = len(tdec)
    interp = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    interp.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    interp.data[:] = interp.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    interp.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec <= fd['time'].max()) & valid)
        #-- create an interpolator for model variable
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['y'], fd['x']), gs['cumulative'].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['y'], fd['x']), gs['cumulative'].mask)

        #-- interpolate to points
        interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        #-- set interpolation type (1: interpolated)
        interp.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['time'].min()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero((tdec < fd['time'].min()) & valid)
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['time'][k]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['cumulative'].data[k, :, :].T, kx=1, ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['cumulative'].mask[k, :, :].T, kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[0])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['time'].max()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after model
        ind, = np.nonzero((tdec > fd['time'].max()) & valid)
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['time'][kk]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['cumulative'].data[kk, :, :].T,
                kx=1,
                ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['cumulative'].mask[kk, :, :].T,
                kx=1,
                ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[-1])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((interp.data == interp.fill_value)
                          | np.isnan(interp.data))
    interp.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        interp.fill_value = FILL_VALUE
        interp.data[interp.mask] = interp.fill_value

    #-- return the interpolated values
    return interp
def racmo_integrate_firn_height(base_dir, MODEL, VARIABLE='zs', OUTPUT=True):

    #-- set parameters based on input model
    FIRN_FILE = {}
    if (MODEL == 'FGRN11'):
        #-- filename and directory for input FGRN11 file
        FIRN_FILE['zs'] = 'FDM_zs_FGRN11_1960-2016.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN11_1960-2016.nc'
        FIRN_DIRECTORY = ['RACMO', 'FGRN11_1960-2016']
        FIRN_OUTPUT = 'FDM_{0}_FGRN11_1960-2016_Promice.txt'
        #-- time is year decimal from 1960-01-01 at time_step 10 days
        time_step = 10.0 / 365.25
        #-- rotation parameters
        rot_lat = -18.0
        rot_lon = -37.5
    elif (MODEL == 'FGRN055'):
        #-- filename and directory for input FGRN055 file
        FIRN_FILE['zs'] = 'FDM_zs_FGRN055_1960-2017_interpol.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN055_1960-2017_interpol.nc'
        FIRN_FILE['Mask'] = 'FGRN055_Masks_5.5km.nc'
        FIRN_DIRECTORY = ['RACMO', 'FGRN055_1960-2017']
        FIRN_OUTPUT = 'FDM_{0}_FGRN055_1960-2017_Promice.txt'
        #-- time is year decimal from 1960-01-01 at time_step 10 days
        time_step = 10.0 / 365.25
        #-- rotation parameters
        rot_lat = -18.0
        rot_lon = -37.5

    #-- Open the RACMO NetCDF file for reading
    ddir = os.path.join(base_dir, *FIRN_DIRECTORY)
    fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE[VARIABLE]), 'r')
    #-- Get data from each netCDF variable and remove singleton dimensions
    fd = {}
    fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
    fd['lon'] = fileID.variables['lon'][:, :].copy()
    fd['lat'] = fileID.variables['lat'][:, :].copy()
    fd['time'] = fileID.variables['time'][:].copy()
    #-- invalid data value
    fv = np.float(fileID.variables[VARIABLE]._FillValue)
    #-- input shape of RACMO firn data
    nt, ny, nx = np.shape(fd[VARIABLE])
    #-- close the NetCDF files
    fileID.close()

    #-- Open the RACMO Mask NetCDF file for reading
    fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE['Mask']), 'r')
    #-- Get data from each netCDF mask variable and remove singleton dimensions
    mask = {}
    for var in [
            'Area', 'Icemask_GR', 'Promicemask', 'Topography', 'lon', 'lat'
    ]:
        mask[var] = np.squeeze(fileID.variables[var][:].copy())
    my, mx = np.shape(mask['Area'])
    #-- close the NetCDF files
    fileID.close()

    #-- rotated pole longitude and latitude of input model (model coordinates)
    xg, yg = rotate_coordinates(fd['lon'], fd['lat'], rot_lon, rot_lat)
    xmask, ymask = rotate_coordinates(mask['lon'], mask['lat'], rot_lon,
                                      rot_lat)
    #-- recreate arrays to fix small floating point errors
    #-- (ensure that arrays are monotonically increasing)
    mask['x'] = np.linspace(np.mean(xmask[:, 0]), np.mean(xmask[:, -1]), mx)
    mask['y'] = np.linspace(np.mean(ymask[0, :]), np.mean(ymask[-1, :]), my)

    #-- create an interpolator for input masks
    #-- masks are on the original RACMO grid and not the firn model grid
    IMI = scipy.interpolate.RegularGridInterpolator((mask['y'], mask['x']),
                                                    mask['Icemask_GR'])
    PMI = scipy.interpolate.RegularGridInterpolator((mask['y'], mask['x']),
                                                    mask['Promicemask'])
    AMI = scipy.interpolate.RegularGridInterpolator((mask['y'], mask['x']),
                                                    mask['Area'])
    #-- interpolate masks to firn model coordinates
    Icemask_GR = IMI.__call__(np.c_[yg.flatten(), xg.flatten()])
    Promicemask = PMI.__call__(np.c_[yg.flatten(), xg.flatten()])
    #-- reshape, round to fix interpolation errors and convert to integers
    fd['Icemask_GR'] = np.round(Icemask_GR.reshape(ny, nx)).astype('i')
    fd['Promicemask'] = np.round(Promicemask.reshape(ny, nx)).astype('i')
    #-- interpolate area to firn model coordinates
    fd['Area'] = AMI.__call__(np.c_[yg.flatten(),
                                    xg.flatten()]).reshape(ny, nx)
    #-- clear memory of flattened interpolation masks
    Icemask_GR = None
    Promicemask = None

    #-- output integrated arrays of firn variable (height or firn air content)
    #-- for each land classification mask in km^3
    firn_volume = np.full((nt, 3), fv, dtype=np.float)
    #-- extrapolate out in time two years
    tdec = np.arange(fd['time'][-1] + time_step, fd['time'][-1] + 2, time_step)
    ntx = len(tdec)
    firn_extrap = np.full((ntx, 3), fv, dtype=np.float)
    for m in range(3):
        #-- indices of specified mask (0==ocean, 1==ice caps outside Greenland)
        #-- masks of interest: Greenland ice sheet and peripheral glaciers (2-4)
        i, j = np.nonzero((fd[VARIABLE][0, :, :] != fv)
                          & (fd['Icemask_GR'] == 1)
                          & (fd['Promicemask'] == (m + 2)))
        #-- for each time
        for t in range(nt):
            #-- convert firn height change to km
            firn_volume[t, m] = np.sum(fd[VARIABLE][t, i, j] *
                                       fd['Area'][i, j] / 1e3)
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = 365
        T = np.zeros((N))
        FIRN = np.zeros((N))
        #-- reduce time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at k
            T[k] = fd['time'][kk]
            FIRN[k] = firn_volume[kk, m]
        #-- calculate regression model
        firn_extrap[:,
                    m] = regress_model(T,
                                       FIRN,
                                       tdec,
                                       ORDER=2,
                                       CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                                       RELATIVE=T[-1])

    #-- combine into single arrays
    combined_time = np.concatenate((fd['time'], tdec), axis=0)
    combined_firn = np.concatenate((firn_volume, firn_extrap), axis=0)

    #-- print to file
    if OUTPUT:
        #-- open the file
        fid = open(os.path.join(ddir, FIRN_OUTPUT.format(VARIABLE)), 'w')
        #-- print for each time
        for i, t in enumerate(combined_time):
            args = (t, *combined_firn[i, :])
            print('{0:0.4f}{1:12.4f}{2:12.4f}{3:12.4f}'.format(*args),
                  file=fid)
        #-- close the file
        fid.close()

    #-- return the combined integrated values
    return (combined_firn, combined_time)
def extrapolate_merra_hybrid(base_dir,
                             EPSG,
                             REGION,
                             tdec,
                             X,
                             Y,
                             VERSION='v1',
                             VARIABLE='FAC',
                             SEARCH='BallTree',
                             N=10,
                             POWER=2.0,
                             SIGMA=1.5,
                             FILL_VALUE=None,
                             EXTRAPOLATE=False):

    #-- set the input netCDF4 file for the variable of interest
    if VARIABLE in ('FAC', 'cum_smb_anomaly', 'height'):
        hybrid_file = 'gsfc_fdm_{0}_{1}.nc'.format(VERSION, REGION.lower())
    if VARIABLE in ('FAC') and (VERSION == 'v0'):
        hybrid_file = 'gsfc_{0}_{1}.nc'.format('FAC', REGION.lower())
    elif VARIABLE in ('p_minus_e', 'melt') and (VERSION == 'v0'):
        hybrid_file = 'm2_hybrid_{0}_cumul_{1}.nc'.format(
            VARIABLE, REGION.lower())

    #-- Open the MERRA-2 Hybrid NetCDF file for reading
    fileID = netCDF4.Dataset(os.path.join(base_dir, hybrid_file), 'r')
    #-- Get data from each netCDF variable and remove singleton dimensions
    fd = {}
    fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
    xg = fileID.variables['x'][:, :].copy()
    yg = fileID.variables['y'][:, :].copy()
    fd['time'] = fileID.variables['time'][:].copy()
    #-- invalid data value
    fv = np.float(fileID.variables[VARIABLE]._FillValue)
    #-- input shape of MERRA-2 Hybrid firn data
    nt, nx, ny = np.shape(fd[VARIABLE])
    #-- close the NetCDF files
    fileID.close()
    #-- time is year decimal at time step 5 days
    time_step = 5.0 / 365.25

    #-- indices of specified ice mask
    i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv)
    #-- create mask object for interpolating data
    fd['mask'] = np.zeros((nx, ny))
    fd['mask'][i, j] = 1.0
    #-- extract x and y coordinate arrays from grids
    fd['x'], fd['y'] = (xg[:, 0], yg[0, :])

    #-- use a gaussian filter to smooth mask
    gs = {}
    gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                               SIGMA,
                                               mode='constant',
                                               cval=0)
    #-- indices of smoothed ice mask
    ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
    #-- use a gaussian filter to smooth each firn field
    gs[VARIABLE] = np.ma.zeros((nt, nx, ny), fill_value=fv)
    gs[VARIABLE].mask = np.zeros((nt, nx, ny), dtype=np.bool)
    for t in range(nt):
        #-- replace fill values before smoothing data
        temp1 = np.zeros((nx, ny))
        #-- reference to first firn field
        temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j]
        #-- smooth firn field
        temp2 = scipy.ndimage.gaussian_filter(temp1,
                                              SIGMA,
                                              mode='constant',
                                              cval=0)
        #-- scale output smoothed firn field
        gs[VARIABLE].data[t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
        #-- replace valid firn values with original
        gs[VARIABLE].data[t, i, j] = temp1[i, j]
        #-- set mask variables for time
        gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0)

    #-- convert projection from input coordinates (EPSG) to model coordinates
    #-- MERRA-2 Hybrid models are rotated pole latitude and longitude
    MODEL_EPSG = set_projection(REGION)
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init={0}".format(MODEL_EPSG))
    ix, iy = pyproj.transform(proj1, proj2, X, Y)

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output interpolated arrays of variable
    npts = len(tdec)
    extrap_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    extrap_data.mask = np.ones((npts), dtype=np.bool)
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap_data.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec < fd['time'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- find indices for linearly interpolating in time
        f = scipy.interpolate.interp1d(fd['time'],
                                       np.arange(nt),
                                       kind='linear')
        date_indice = f(tind).astype(np.int)
        #-- for each unique firn date
        #-- linearly interpolate in time between two firn maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the N closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=N, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, N))
            #-- firn height or air content for times before and after tdec
            firn1 = gs[VARIABLE][k, ii, jj]
            firn2 = gs[VARIABLE][k + 1, ii, jj]
            #-- linearly interpolate to date
            dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] -
                                               fd['time'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap_data[kk] = (1.0-dt)*np.sum(w*firn1[indices],axis=1) + \
                dt*np.sum(w*firn2[indices], axis=1)
        #-- set interpolation type (1: interpolated in time)
        extrap_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero(tdec < fd['time'].min())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before firn model
        ind, = np.nonzero(tdec < fd['time'].min())
        #-- query the search tree to find the N closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=N, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N))
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = np.int(10.0 / time_step)
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = fd['time'][k]
            #-- spatially extrapolate firn elevation or air content
            firn1 = gs[VARIABLE][k, ii, jj]
            FIRN[:, k] = np.sum(w * firn1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero(tdec >= fd['time'].max())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after firn model
        ind, = np.nonzero(tdec >= fd['time'].max())
        #-- query the search tree to find the N closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=N, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N))
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = np.int(10.0 / time_step)
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at k
            T[k] = fd['time'][kk]
            #-- spatially extrapolate firn elevation or air content
            firn1 = gs[VARIABLE][kk, ii, jj]
            FIRN[:, k] = np.sum(w * firn1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[-1])
        #-- set interpolation type (3: extrapolated forwards in time)
        extrap_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value)
    extrap_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        extrap_data.fill_value = FILL_VALUE
        extrap_data.data[extrap_data.mask] = extrap_data.fill_value

    #-- return the interpolated values
    return extrap_data
Beispiel #9
0
def extrapolate_racmo_firn(base_dir,
                           EPSG,
                           MODEL,
                           tdec,
                           X,
                           Y,
                           SEARCH='BallTree',
                           NN=10,
                           POWER=2.0,
                           SIGMA=1.5,
                           VARIABLE='zs',
                           FILL_VALUE=None,
                           REFERENCE=False):

    #-- set parameters based on input model
    FIRN_FILE = {}
    if (MODEL == 'FGRN11'):
        #-- filename and directory for input FGRN11 file
        FIRN_FILE['zs'] = 'FDM_zs_FGRN11_1960-2016.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN11_1960-2016.nc'
        FIRN_DIRECTORY = ['RACMO', 'FGRN11_1960-2016']
    elif (MODEL == 'FGRN055'):
        #-- filename and directory for input FGRN055 file
        FIRN_FILE['zs'] = 'FDM_zs_FGRN055_1960-2017_interpol.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_FGRN055_1960-2017_interpol.nc'
        FIRN_DIRECTORY = ['RACMO', 'FGRN055_1960-2017']
    elif (MODEL == 'XANT27'):
        #-- filename and directory for input XANT27 file
        FIRN_FILE['zs'] = 'FDM_zs_ANT27_1979-2016.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ANT27_1979-2016.nc'
        FIRN_DIRECTORY = ['RACMO', 'XANT27_1979-2016']
    elif (MODEL == 'ASE055'):
        #-- filename and directory for input ASE055 file
        FIRN_FILE['zs'] = 'FDM_zs_ASE055_1979-2015.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_ASE055_1979-2015.nc'
        FIRN_DIRECTORY = ['RACMO', 'ASE055_1979-2015']
    elif (MODEL == 'XPEN055'):
        #-- filename and directory for input XPEN055 file
        FIRN_FILE['zs'] = 'FDM_zs_XPEN055_1979-2016.nc'
        FIRN_FILE['FirnAir'] = 'FDM_FirnAir_XPEN055_1979-2016.nc'
        FIRN_DIRECTORY = ['RACMO', 'XPEN055_1979-2016']

    #-- Open the RACMO NetCDF file for reading
    ddir = os.path.join(base_dir, *FIRN_DIRECTORY)
    fileID = netCDF4.Dataset(os.path.join(ddir, FIRN_FILE[VARIABLE]), 'r')
    #-- Get data from each netCDF variable and remove singleton dimensions
    fd = {}
    fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
    fd['lon'] = fileID.variables['lon'][:, :].copy()
    fd['lat'] = fileID.variables['lat'][:, :].copy()
    fd['time'] = fileID.variables['time'][:].copy()
    #-- invalid data value
    fv = np.float(fileID.variables[VARIABLE]._FillValue)
    #-- input shape of RACMO firn data
    nt, ny, nx = np.shape(fd[VARIABLE])
    #-- close the NetCDF files
    fileID.close()

    #-- indices of specified ice mask
    i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv)

    #-- use a gaussian filter to smooth mask
    gs = {}
    gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                               SIGMA,
                                               mode='constant',
                                               cval=0)
    #-- indices of smoothed ice mask
    ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
    #-- use a gaussian filter to smooth each firn field
    gs[VARIABLE] = np.ma.zeros((nt, ny, nx), fill_value=fv)
    gs[VARIABLE].mask = np.ma.zeros((nt, ny, nx), dtype=bool)
    for t in range(nt):
        #-- replace fill values before smoothing data
        temp1 = np.zeros((ny, nx))
        #-- reference to first firn field
        if REFERENCE:
            temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j]
        else:
            temp1[i, j] = fd[VARIABLE][t, i, j].copy()
        #-- smooth firn field
        temp2 = scipy.ndimage.gaussian_filter(temp1,
                                              SIGMA,
                                              mode='constant',
                                              cval=0)
        #-- scale output smoothed firn field
        gs[VARIABLE][t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
        #-- replace valid firn values with original
        gs[VARIABLE][t, i, j] = temp1[i, j]
        #-- set mask variables for time
        gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0)

    #-- convert RACMO latitude and longitude to input coordinates (EPSG)
    crs1 = pyproj.CRS.from_string(EPSG)
    crs2 = pyproj.CRS.from_string("epsg:{0:d}".format(4326))
    transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)
    direction = pyproj.enums.TransformDirection.INVERSE
    #-- convert projection from model coordinates
    xg, yg = transformer.transform(fd['lon'], fd['lat'], direction=direction)

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output interpolated arrays of firn variable (height or firn air content)
    npts = len(tdec)
    extrap_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    extrap_data.data[:] = extrap_data.fill_value
    extrap_data.mask = np.zeros((npts), dtype=bool)
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap_data.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec < fd['time'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- find indices for linearly interpolating in time
        f = scipy.interpolate.interp1d(fd['time'],
                                       np.arange(nt),
                                       kind='linear')
        date_indice = f(tind).astype(np.int)
        #-- for each unique firn date
        #-- linearly interpolate in time between two firn maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the NN closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=NN, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, NN))
            #-- firn height or air content for times before and after tdec
            firn1 = gs[VARIABLE][k, ii, jj]
            firn2 = gs[VARIABLE][k + 1, ii, jj]
            #-- linearly interpolate to date
            dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] -
                                               fd['time'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap_data[kk] = (1.0-dt)*np.sum(w*firn1[indices],axis=1) + \
                dt*np.sum(w*firn2[indices], axis=1)
        #-- set interpolation type (1: interpolated in time)
        extrap_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero(tdec < fd['time'].min())
    if (count > 0):
        #-- indices of dates before firn model
        ind, = np.nonzero(tdec < fd['time'].min())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = 365
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = gs['time'][k]
            #-- spatially extrapolate firn elevation or air content
            firn1 = fd[VARIABLE][k, ii, jj]
            FIRN[:, k] = np.sum(w * firn1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero(tdec >= fd['time'].max())
    if (count > 0):
        #-- indices of dates after firn model
        ind, = np.nonzero(tdec >= fd['time'].max())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = 365
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at k
            T[k] = fd['time'][kk]
            #-- spatially extrapolate firn elevation or air content
            firn1 = gs[VARIABLE][kk, ii, jj]
            FIRN[:, k] = np.sum(w * firn1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[-1])
        #-- set interpolation type (3: extrapolated forward in time)
        extrap_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value)
    extrap_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        extrap_data.fill_value = FILL_VALUE
        extrap_data.data[extrap_data.mask] = extrap_data.fill_value

    #-- return the interpolated values
    return extrap_data
Beispiel #10
0
def interpolate_racmo_downscaled(base_dir,
                                 EPSG,
                                 VERSION,
                                 tdec,
                                 X,
                                 Y,
                                 VARIABLE='SMB',
                                 FILL_VALUE=None):

    #-- Full Directory Setup
    DIRECTORY = 'SMB1km_v{0}'.format(VERSION)

    #-- netcdf variable names
    input_products = {}
    input_products['SMB'] = 'SMB_rec'
    input_products['PRECIP'] = 'precip'
    input_products['RUNOFF'] = 'runoff'
    input_products['SNOWMELT'] = 'snowmelt'
    input_products['REFREEZE'] = 'refreeze'
    #-- version 1 was in separate files for each year
    if (VERSION == '1.0'):
        RACMO_MODEL = ['XGRN11', '2.3']
        VARNAME = input_products[VARIABLE]
        SUBDIRECTORY = '{0}_v{1}'.format(VARNAME, VERSION)
        input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY, SUBDIRECTORY)
    elif (VERSION == '2.0'):
        RACMO_MODEL = ['XGRN11', '2.3p2']
        var = input_products[VARIABLE]
        VARNAME = var if VARIABLE in ('SMB',
                                      'PRECIP') else '{0}corr'.format(var)
        input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY)
    elif (VERSION == '3.0'):
        RACMO_MODEL = ['FGRN055', '2.3p2']
        var = input_products[VARIABLE]
        VARNAME = var if (VARIABLE == 'SMB') else '{0}corr'.format(var)
        input_dir = os.path.join(base_dir, 'RACMO', DIRECTORY)
    #-- input cumulative netCDF4 file
    args = (RACMO_MODEL[0], RACMO_MODEL[1], VERSION, VARIABLE)
    input_file = '{0}_RACMO{1}_DS1km_v{2}_{3}_cumul.nc'.format(*args)

    #-- convert projection from input coordinates (EPSG) to model coordinates
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(3413))
    ix, iy = pyproj.transform(proj1, proj2, X, Y)

    #-- Open the RACMO NetCDF file for reading
    fileID = netCDF4.Dataset(os.path.join(input_dir, input_file), 'r')
    #-- input shape of RACMO data
    nt = fileID[VARNAME].shape[0]
    #-- Get data from each netCDF variable and remove singleton dimensions
    d = {}
    #-- cell origins on the bottom right
    dx = np.abs(fileID.variables['x'][1] - fileID.variables['x'][0])
    dy = np.abs(fileID.variables['y'][1] - fileID.variables['y'][0])
    #-- x and y arrays at center of each cell
    d['x'] = fileID.variables['x'][:].copy() - dx / 2.0
    d['y'] = fileID.variables['y'][:].copy() - dy / 2.0
    #-- extract time (decimal years)
    d['TIME'] = fileID.variables['TIME'][:].copy()

    #-- choose a subset of model variables that span the input data
    xr = [ix.min() - dx, ix.max() + dx]
    yr = [iy.min() - dy, iy.max() + dy]
    cols = np.flatnonzero((d['x'] >= xr[0]) & (d['x'] <= xr[1]))
    rows = np.flatnonzero((d['y'] >= yr[0]) & (d['y'] <= yr[1]))
    ny = rows.size
    nx = cols.size
    #-- mask object for interpolating data
    d['MASK'] = np.array(fileID.variables['MASK'][rows, cols], dtype=np.bool)
    d['x'] = d['x'][cols]
    d['y'] = d['y'][rows]
    # i,j = np.nonzero(d['MASK'])

    #-- check that input points are within convex hull of valid model points
    #xg,yg = np.meshgrid(d['x'],d['y'])
    #points = np.concatenate((xg[i,j,None],yg[i,j,None]),axis=1)
    #triangle = scipy.spatial.Delaunay(points.data, qhull_options='Qt Qbb Qc Qz')
    #interp_points = np.concatenate((ix[:,None],iy[:,None]),axis=1)
    #valid = (triangle.find_simplex(interp_points) >= 0)
    # Check ix and iy against the bounds of d['x'] and d['y']
    valid = (ix >= d['x'].min()) & (ix <= d['x'].max()) & (
        iy >= d['y'].min()) & (iy <= d['y'].max())

    MI = scipy.interpolate.RegularGridInterpolator((d['y'], d['x']), d['MASK'])
    # check valid points against the mask:
    valid[valid] = MI.__call__(np.c_[iy[valid], ix[valid]])

    #-- output interpolated arrays of variable
    npts = len(tdec)
    interp_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    #-- interpolation mask of invalid values
    interp_data.mask = np.ones((npts), dtype=np.bool)
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    interp_data.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= d['TIME'].min()) & (tdec <= d['TIME'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= d['TIME'].min()) & (tdec <= d['TIME'].max())
                          & valid)
        #-- determine which subset of time to read from the netCDF4 file
        f = scipy.interpolate.interp1d(d['TIME'],
                                       np.arange(nt),
                                       kind='linear',
                                       fill_value=(0, nt - 1),
                                       bounds_error=False)
        date_indice = f(tdec[ind]).astype(np.int)
        #-- months to read
        months = np.arange(date_indice.min(),
                           np.minimum(date_indice.max() + 2, d['TIME'].size))
        nm = len(months)
        #-- extract variable for months of interest
        d[VARNAME] = np.zeros((nm, ny, nx))
        for i, m in enumerate(months):
            d[VARNAME][i, :, :] = fileID.variables[VARNAME][m, rows,
                                                            cols].copy()

        #-- create an interpolator for variable
        RGI = scipy.interpolate.RegularGridInterpolator(
            (d['TIME'][months], d['y'], d['x']), d[VARNAME])

        #-- interpolate to points
        interp_data.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind],
                                                   ix[ind]])
        interp_data.mask[ind] = MI.__call__(np.c_[iy[ind], ix[ind]])
        #-- set interpolation type (1: interpolated)
        interp_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < d['TIME'].min()) & valid)
    if (count > 0):
        #-- indices of dates before RACMO model
        ind, = np.nonzero((tdec < d['TIME'].min()) & valid)
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = 120
        #-- spatially interpolate variable to coordinates
        VAR = np.zeros((count, N))
        T = np.zeros((N))
        #-- spatially interpolate mask to coordinates
        mspl = scipy.interpolate.RectBivariateSpline(d['x'],
                                                     d['y'],
                                                     d['MASK'].T,
                                                     kx=1,
                                                     ky=1)
        interp_data.mask[ind] = mspl.ev(ix[ind], iy[ind])
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = d['TIME'][k]
            #-- spatially interpolate variable
            spl = scipy.interpolate.RectBivariateSpline(
                d['x'],
                d['y'],
                fileID.variables[VARNAME][k, rows, cols].T,
                kx=1,
                ky=1)
            #-- create numpy masked array of interpolated values
            VAR[:, k] = spl.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp_data.data[v] = regress_model(
                T,
                VAR[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[0])
        #-- set interpolation type (2: extrapolated backward)
        interp_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > d['TIME'].max()) & valid)
    if (count > 0):
        #-- indices of dates after RACMO model
        ind, = np.nonzero((tdec > d['TIME'].max()) & valid)
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = 120
        #-- spatially interpolate variable to coordinates
        VAR = np.zeros((count, N))
        T = np.zeros((N))
        #-- spatially interpolate mask to coordinates
        mspl = scipy.interpolate.RectBivariateSpline(d['x'],
                                                     d['y'],
                                                     d['MASK'].T,
                                                     kx=1,
                                                     ky=1)
        interp_data.mask[ind] = mspl.ev(ix[ind], iy[ind])
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at k
            T[k] = d['TIME'][kk]
            #-- spatially interpolate variable
            spl = scipy.interpolate.RectBivariateSpline(
                d['x'],
                d['y'],
                fileID.variables[VARNAME][kk, rows, cols].T,
                kx=1,
                ky=1)
            #-- create numpy masked array of interpolated values
            VAR[:, k] = spl.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp_data.data[v] = regress_model(
                T,
                VAR[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[-1])
        #-- set interpolation type (3: extrapolated forward)
        interp_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(interp_data.data == interp_data.fill_value)
    interp_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        interp_data.fill_value = FILL_VALUE
        interp_data.data[interp_data.mask] = interp_data.fill_value

    #-- close the NetCDF files
    fileID.close()

    #-- return the interpolated values
    return interp_data
Beispiel #11
0
def interpolate_mar_daily(DIRECTORY,
                          EPSG,
                          VERSION,
                          tdec,
                          X,
                          Y,
                          XNAME=None,
                          YNAME=None,
                          TIMENAME='TIME',
                          VARIABLE='SMB',
                          SIGMA=1.5,
                          FILL_VALUE=None,
                          EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- regular expression pattern for MAR dataset
    rx = re.compile('{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS))

    #-- MAR model projection: Polar Stereographic (Oblique)
    #-- Earth Radius: 6371229 m
    #-- True Latitude: 0
    #-- Center Longitude: -40
    #-- Center Latitude: 70.5
    proj4_params = ("+proj=sterea +lat_0=+70.5 +lat_ts=0 +lon_0=-40.0 "
                    "+a=6371229 +no_defs")

    #-- create list of files to read
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables[XNAME][:])
            ny = len(fileID.variables[YNAME][:])
            nt += len(fileID.variables[TIMENAME][:])

    #-- python dictionary with file variables
    fd = {}
    fd['TIME'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE)
    gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            t = len(fileID.variables['TIME'][:])
            #-- create a masked array with all data
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
            fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=np.bool)
            #-- surface type
            SRF = fileID.variables['SRF'][:]
            #-- indices of specified ice mask
            i, j = np.nonzero(SRF == 4)
            #-- ice fraction
            FRA = fileID.variables['FRA'][:] / 100.0
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            #-- combine sectors for multi-layered data
            if (np.ndim(tmp) == 4):
                #-- create mask for combining data
                MASK = np.zeros((nt, ny, nx))
                MASK[:, i, j] = FRA[:, 0, i, j]
                #-- combine data
                fd[VARIABLE][:] = MASK * tmp[:, 0, :, :] + (
                    1.0 - MASK) * tmp[:, 1, :, :]
            else:
                #-- copy data
                fd[VARIABLE][:] = tmp.copy()
            #-- verify mask object for interpolating data
            surf_mask = np.broadcast_to(SRF, (t, ny, nx))
            fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4)
            #-- combine mask object through time to create a single mask
            fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- MAR coordinates
            fd['LON'] = fileID.variables['LON'][:, :].copy()
            fd['LAT'] = fileID.variables['LAT'][:, :].copy()
            #-- convert x and y coordinates to meters
            fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy()
            fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy()
            #-- extract delta time and epoch of time
            delta_time = fileID.variables[TIMENAME][:].astype(np.float)
            units = fileID.variables[TIMENAME].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['TIME'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE].data[tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['CUMULATIVE'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert projection from input coordinates (EPSG) to model coordinates
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj(proj4_params)
    #-- calculate projected coordinates of input coordinates
    ix, iy = pyproj.transform(proj1, proj2, X, Y)

    #-- check that input points are within convex hull of valid model points
    gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y'])
    points = np.concatenate((gs['x'][ii, jj, None], gs['y'][ii, jj, None]),
                            axis=1)
    triangle = scipy.spatial.Delaunay(points.data,
                                      qhull_options='Qt Qbb Qc Qz')
    interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1)
    valid = (triangle.find_simplex(interp_points) >= 0)

    #-- output interpolated arrays of model variable
    npts = len(tdec)
    interp = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float)
    interp.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    interp.data[:] = interp.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    interp.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['TIME'].min()) & (tdec <= fd['TIME'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['TIME'].min())
                          & (tdec <= fd['TIME'].max()) & valid)
        #-- create an interpolator for model variable
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].mask)

        #-- interpolate to points
        interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        #-- set interpolation type (1: interpolated)
        interp.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['TIME'].min()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero((tdec < fd['TIME'].min()) & valid)
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['TIME'][k]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['CUMULATIVE'].data[k, :, :].T, kx=1, ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['CUMULATIVE'].mask[k, :, :].T, kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[0])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['TIME'].max()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after model
        ind, = np.nonzero((tdec > fd['TIME'].max()) & valid)
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['TIME'][kk]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['CUMULATIVE'].data[kk, :, :].T,
                kx=1,
                ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['CUMULATIVE'].mask[kk, :, :].T,
                kx=1,
                ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[-1])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((interp.data == interp.fill_value)
                          | np.isnan(interp.data))
    interp.mask[invalid] = True

    #-- return the interpolated values
    return interp
Beispiel #12
0
def extrapolate_racmo_daily(base_dir,
                            EPSG,
                            MODEL,
                            tdec,
                            X,
                            Y,
                            VARIABLE='smb',
                            SIGMA=1.5,
                            SEARCH='BallTree',
                            NN=10,
                            POWER=2.0,
                            FILL_VALUE=None,
                            EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- input list of files
    if (MODEL == 'FGRN055'):
        #-- filename and directory for input FGRN055 files
        file_pattern = 'RACMO2.3p2_FGRN055_{0}_daily_(\d+).nc'
        DIRECTORY = os.path.join(base_dir, 'RACMO', 'GL', 'RACMO2.3p2_FGRN055')

    #-- create list of files to read
    rx = re.compile(file_pattern.format(VARIABLE, YRS), re.VERBOSE)
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables['rlon'][:])
            ny = len(fileID.variables['rlat'][:])
            nt += len(fileID.variables['time'][:])
            #-- invalid data value
            fv = np.float(fileID.variables[VARIABLE]._FillValue)

    #-- scaling factor for converting units
    if (VARIABLE == 'hgtsrf'):
        scale_factor = 86400.0
    elif (VARIABLE == 'smb'):
        scale_factor = 1.0

    #-- python dictionary with file variables
    fd = {}
    fd['time'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['cumulative'] = np.ma.zeros((nt, ny, nx), fill_value=fv)
    gs['cumulative'].mask = np.zeros((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            t = len(fileID.variables['time'][:])
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
            fd[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            fd[VARIABLE][:] = scale_factor * tmp
            #-- indices of specified ice mask
            i, j = np.nonzero(tmp[0, :, :] != fv)
            fd[VARIABLE].mask[:, i, j] = False
            #-- combine mask object through time to create a single mask
            fd['mask'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- racmo coordinates
            fd['lon'] = fileID.variables['lon'][:, :].copy()
            fd['lat'] = fileID.variables['lat'][:, :].copy()
            fd['x'] = fileID.variables['rlon'][:].copy()
            fd['y'] = fileID.variables['rlat'][:].copy()
            #-- rotated pole parameters
            proj4_params = fileID.variables['rotated_pole'].proj4_params
            #-- extract delta time and epoch of time
            delta_time = fileID.variables['time'][:].astype(np.float)
            units = fileID.variables['time'].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['time'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE][tt, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE][tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['cumulative'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['cumulative'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert RACMO latitude and longitude to input coordinates (EPSG)
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326))
    xg, yg = pyproj.transform(proj2, proj1, fd['lon'], fd['lat'])

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output interpolated arrays of variable
    npts = len(tdec)
    extrap = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    extrap.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    extrap.data[:] = extrap.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec < fd['time'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- find indices for linearly interpolating in time
        f = scipy.interpolate.interp1d(fd['time'],
                                       np.arange(nt),
                                       kind='linear')
        date_indice = f(tind).astype(np.int)
        #-- for each unique racmo date
        #-- linearly interpolate in time between two racmo maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the NN closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=NN, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, NN))
            #-- variable for times before and after tdec
            var1 = gs['cumulative'][k, i, j]
            var2 = gs['cumulative'][k + 1, i, j]
            #-- linearly interpolate to date
            dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] -
                                               fd['time'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \
                dt*np.sum(w*var2[indices], axis=1)
        #-- set interpolation type (1: interpolated in time)
        extrap.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero(tdec < fd['time'].min())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero(tdec < fd['time'].min())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['time'][k]
            #-- spatially extrapolate variable
            tmp = gs['cumulative'][k, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap[v] = regress_model(TIME,
                                      DATA[n, :],
                                      tdec[v],
                                      ORDER=2,
                                      CYCLES=[0.25, 0.5, 1.0],
                                      RELATIVE=TIME[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero(tdec >= fd['time'].max())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after racmo model
        ind, = np.nonzero(tdec >= fd['time'].max())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['time'][kk]
            #-- spatially extrapolate variable
            tmp = gs['cumulative'][kk, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap[v] = regress_model(TIME,
                                      DATA[n, :],
                                      tdec[v],
                                      ORDER=2,
                                      CYCLES=[0.25, 0.5, 1.0],
                                      RELATIVE=TIME[-1])
        #-- set interpolation type (3: extrapolated forward in time)
        extrap.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((extrap.data == extrap.fill_value)
                          | np.isnan(extrap.data))
    extrap.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        extrap.fill_value = FILL_VALUE
        extrap.data[extrap.mask] = extrap.fill_value

    #-- return the interpolated values
    return extrap
Beispiel #13
0
def interpolate_mar_daily(DIRECTORY,
                          EPSG,
                          VERSION,
                          tdec,
                          X,
                          Y,
                          XNAME=None,
                          YNAME=None,
                          TIMENAME='TIME',
                          VARIABLE='SMB',
                          SIGMA=1.5,
                          FILL_VALUE=None,
                          EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- regular expression pattern for MAR dataset
    rx = re.compile(r'{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS))

    #-- MAR model projection: Polar Stereographic (Oblique)
    #-- Earth Radius: 6371229 m
    #-- True Latitude: 0
    #-- Center Longitude: -40
    #-- Center Latitude: 70.5
    proj4_params = ("+proj=sterea +lat_0=+70.5 +lat_ts=0 +lon_0=-40.0 "
                    "+a=6371229 +no_defs")

    #-- create list of files to read
    try:
        input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])
    except Exception as e:
        print(f"failed to find files matching {VERSION} in {DIRECTORY}")
        raise (e)

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables[XNAME][:])
            ny = len(fileID.variables[YNAME][:])
            TIME = fileID.variables[TIMENAME][:]
            try:
                nt += np.count_nonzero(TIME.data != TIME.fill_value)
            except AttributeError:
                nt += len(TIME)

    #-- python dictionary with file variables
    fd = {}
    fd['TIME'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE)
    gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            TIME = fileID.variables['TIME'][:]
            try:
                t = np.count_nonzero(TIME.data != TIME.fill_value)
            except AttributeError:
                t = len(TIME)
            #-- create a masked array with all data
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
            fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=bool)
            #-- surface type
            SRF = fileID.variables['SRF'][:]
            #-- indices of specified ice mask
            i, j = np.nonzero(SRF == 4)
            #-- ice fraction
            FRA = fileID.variables['FRA'][:] / 100.0
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            #-- combine sectors for multi-layered data
            if (np.ndim(tmp) == 4):
                #-- create mask for combining data
                MASK = np.zeros((t, ny, nx))
                MASK[:, i, j] = FRA[:t, 0, i, j]
                #-- combine data
                fd[VARIABLE][:] = MASK * tmp[:t, 0, :, :] + (
                    1.0 - MASK) * tmp[:t, 1, :, :]
            else:
                #-- copy data
                fd[VARIABLE][:] = tmp[:t, :, :].copy()
            #-- verify mask object for interpolating data
            surf_mask = np.broadcast_to(SRF, (t, ny, nx))
            fd[VARIABLE].mask = fd[VARIABLE].data == fd[VARIABLE].fill_value
            fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4)
            #-- combine mask object through time to create a single mask
            fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- MAR coordinates
            fd['LON'] = fileID.variables['LON'][:, :].copy()
            fd['LAT'] = fileID.variables['LAT'][:, :].copy()
            #-- convert x and y coordinates to meters
            fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy()
            fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy()
            #-- extract delta time and epoch of time
            delta_time = fileID.variables[TIMENAME][:t].astype(np.float)
            date_string = fileID.variables[TIMENAME].units
        #-- extract epoch and units
        epoch, to_secs = SMBcorr.time.parse_date_string(date_string)
        #-- calculate time array in Julian days
        JD = SMBcorr.time.convert_delta_time(delta_time * to_secs,
                                             epoch1=epoch,
                                             epoch2=(1858, 11, 17, 0, 0, 0),
                                             scale=1.0 / 86400.0) + 2400000.5
        #-- convert from Julian days to calendar dates
        YY, MM, DD, hh, mm, ss = SMBcorr.time.convert_julian(JD)
        #-- calculate time in year-decimal
        fd['TIME'][c:c + t] = SMBcorr.time.convert_calendar_decimal(YY,
                                                                    MM,
                                                                    day=DD,
                                                                    hour=hh,
                                                                    minute=mm,
                                                                    second=ss)
        #-- use a gaussian filter to smooth mask
        gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE].data[tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['CUMULATIVE'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert projection from input coordinates (EPSG) to model coordinates
    crs1 = pyproj.CRS.from_string(EPSG)
    crs2 = pyproj.CRS.from_string(proj4_params)
    transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)
    #-- calculate projected coordinates of input coordinates
    ix, iy = transformer.transform(X, Y)

    #-- check that input points are within convex hull of valid model points
    gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y'])
    v, triangle = find_valid_triangulation(gs['x'][ii, jj], gs['y'][ii, jj])
    #-- check if there is a valid triangulation
    if v:
        #-- check where points are within the complex hull of the triangulation
        interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1)
        valid = (triangle.find_simplex(interp_points) >= 0)
    else:
        #-- Check ix and iy against the bounds of x and y
        valid = (ix >= fd['x'].min()) & (ix <= fd['x'].max()) & \
            (iy >= fd['y'].min()) & (iy <= fd['y'].max())

    #-- output interpolated arrays of model variable
    npts = len(tdec)
    interp = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float)
    interp.mask = np.ones((npts), dtype=bool)
    #-- initially set all values to fill value
    interp.data[:] = interp.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    interp.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['TIME'].min()) & (tdec <= fd['TIME'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['TIME'].min())
                          & (tdec <= fd['TIME'].max()) & valid)
        #-- create an interpolator for model variable
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].mask)

        #-- interpolate to points
        interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        #-- set interpolation type (1: interpolated)
        interp.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['TIME'].min()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero((tdec < fd['TIME'].min()) & valid)
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['TIME'][k]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['CUMULATIVE'].data[k, :, :].T, kx=1, ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['CUMULATIVE'].mask[k, :, :].T, kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[0])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['TIME'].max()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after model
        ind, = np.nonzero((tdec > fd['TIME'].max()) & valid)
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['TIME'][kk]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['CUMULATIVE'].data[kk, :, :].T,
                kx=1,
                ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['CUMULATIVE'].mask[kk, :, :].T,
                kx=1,
                ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[-1])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((interp.data == interp.fill_value)
                          | np.isnan(interp.data))
    interp.mask[invalid] = True

    #-- return the interpolated values
    return interp
Beispiel #14
0
def extrapolate_merra_hybrid(base_dir,
                             EPSG,
                             REGION,
                             tdec,
                             X,
                             Y,
                             VERSION='v1',
                             VARIABLE='FAC',
                             SEARCH='BallTree',
                             N=10,
                             POWER=2.0,
                             SIGMA=1.5,
                             FILL_VALUE=None,
                             EXTRAPOLATE=False,
                             GZIP=False):

    #-- suffix if compressed
    suffix = '.gz' if GZIP else ''
    #-- set the input netCDF4 file for the variable of interest
    if VARIABLE in ('FAC', 'cum_smb_anomaly', 'SMB_a', 'height', 'h_a'):
        args = (VERSION, REGION.lower(), suffix)
        hybrid_file = 'gsfc_fdm_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('smb', 'SMB', 'Me', 'Ra', 'Ru', 'Sn-Ev'):
        args = (VERSION, REGION.lower(), suffix)
        hybrid_file = 'gsfc_fdm_smb_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('Me_a', 'Ra_a', 'Ru_a', 'Sn-Ev_a'):
        args = (VERSION, REGION.lower(), suffix)
        hybrid_file = 'gsfc_fdm_smb_cumul_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('FAC') and (VERSION == 'v0'):
        args = ('FAC', REGION.lower(), suffix)
        hybrid_file = 'gsfc_{0}_{1}.nc{2}'.format(*args)
    elif VARIABLE in ('p_minus_e', 'melt') and (VERSION == 'v0'):
        args = (VARIABLE, REGION.lower(), suffix)
        hybrid_file = 'm2_hybrid_{0}_cumul_{1}.nc{2}'.format(*args)

    #-- Open the MERRA-2 Hybrid NetCDF file for reading
    if GZIP:
        #-- read as in-memory (diskless) netCDF4 dataset
        with gzip.open(os.path.join(base_dir, hybrid_file), 'r') as f:
            fileID = netCDF4.Dataset(uuid.uuid4().hex, memory=f.read())
    else:
        #-- read netCDF4 dataset
        fileID = netCDF4.Dataset(os.path.join(base_dir, hybrid_file), 'r')

    #-- Get data from each netCDF variable and remove singleton dimensions
    fd = {}
    #-- time is year decimal at time step 5 days
    time_step = 5.0 / 365.25
    #-- if extrapolating data: read the full dataset
    #-- if simply interpolating with fill values: reduce to a subset
    if EXTRAPOLATE:
        #-- read time variables
        fd['time'] = fileID.variables['time'][:].copy()
        #-- read full dataset and remove singleton dimensions
        fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][:].copy())
    else:
        #-- reduce grids to time period of input buffered by time steps
        tmin = np.min(tdec) - 2.0 * time_step
        tmax = np.max(tdec) + 2.0 * time_step
        #-- find indices to times
        nt, = fileID.variables['time'].shape
        f = scipy.interpolate.interp1d(fileID.variables['time'][:],
                                       np.arange(nt),
                                       kind='nearest',
                                       bounds_error=False,
                                       fill_value=(0, nt))
        imin, imax = f((tmin, tmax)).astype(np.int)
        #-- read reduced time variables
        fd['time'] = fileID.variables['time'][imin:imax + 1].copy()
        #-- read reduced dataset and remove singleton dimensions
        fd[VARIABLE] = np.squeeze(fileID.variables[VARIABLE][imin:imax +
                                                             1, :, :])
    #-- invalid data value
    fv = np.float(fileID.variables[VARIABLE]._FillValue)
    #-- input shape of MERRA-2 Hybrid firn data
    nt, nx, ny = np.shape(fd[VARIABLE])
    #-- extract x and y coordinate arrays from grids if applicable
    #-- else create meshgrids of coordinate arrays
    if (np.ndim(fileID.variables['x'][:]) == 2):
        xg = fileID.variables['x'][:].copy()
        yg = fileID.variables['y'][:].copy()
        fd['x'], fd['y'] = (xg[:, 0], yg[0, :])
    else:
        fd['x'] = fileID.variables['x'][:].copy()
        fd['y'] = fileID.variables['y'][:].copy()
        xg, yg = np.meshgrid(fd['x'], fd['y'], indexing='ij')
    #-- close the NetCDF files
    fileID.close()

    #-- indices of specified ice mask
    i, j = np.nonzero(fd[VARIABLE][0, :, :] != fv)
    #-- create mask object for interpolating data
    fd['mask'] = np.zeros((nx, ny))
    fd['mask'][i, j] = 1.0

    #-- use a gaussian filter to smooth mask
    gs = {}
    gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                               SIGMA,
                                               mode='constant',
                                               cval=0)
    #-- indices of smoothed ice mask
    ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
    #-- use a gaussian filter to smooth each firn field
    gs[VARIABLE] = np.ma.zeros((nt, nx, ny), fill_value=fv)
    gs[VARIABLE].mask = np.zeros((nt, nx, ny), dtype=bool)
    for t in range(nt):
        #-- replace fill values before smoothing data
        temp1 = np.zeros((nx, ny))
        #-- reference to first firn field
        temp1[i, j] = fd[VARIABLE][t, i, j] - fd[VARIABLE][0, i, j]
        #-- smooth firn field
        temp2 = scipy.ndimage.gaussian_filter(temp1,
                                              SIGMA,
                                              mode='constant',
                                              cval=0)
        #-- scale output smoothed firn field
        gs[VARIABLE].data[t, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
        #-- replace valid firn values with original
        gs[VARIABLE].data[t, i, j] = temp1[i, j]
        #-- set mask variables for time
        gs[VARIABLE].mask[t, :, :] = (gs['mask'] == 0.0)

    #-- pyproj transformer for converting to input coordinates (EPSG)
    MODEL_EPSG = set_projection(REGION)
    crs1 = pyproj.CRS.from_string(EPSG)
    crs2 = pyproj.CRS.from_string(MODEL_EPSG)
    transformer = pyproj.Transformer.from_crs(crs1, crs2, always_xy=True)
    direction = pyproj.enums.TransformDirection.INVERSE
    #-- convert projection from model coordinates
    xg, yg = transformer.transform(fd['x'], fd['y'], direction=direction)

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[ii, jj, None], yg[ii, jj, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output interpolated arrays of variable
    npts = len(tdec)
    extrap_data = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    extrap_data.mask = np.ones((npts), dtype=bool)
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap_data.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec < fd['time'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- find indices for linearly interpolating in time
        f = scipy.interpolate.interp1d(fd['time'],
                                       np.arange(nt),
                                       kind='linear')
        date_indice = f(tind).astype(np.int)
        #-- for each unique firn date
        #-- linearly interpolate in time between two firn maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the N closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=N, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, N))
            #-- firn height or air content for times before and after tdec
            firn1 = gs[VARIABLE][k, ii, jj]
            firn2 = gs[VARIABLE][k + 1, ii, jj]
            #-- linearly interpolate to date
            dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] -
                                               fd['time'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap_data[kk] = (1.0-dt)*np.sum(w*firn1[indices],axis=1) + \
                dt*np.sum(w*firn2[indices], axis=1)
        #-- set interpolation type (1: interpolated in time)
        extrap_data.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero(tdec < fd['time'].min())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before firn model
        ind, = np.nonzero(tdec < fd['time'].min())
        #-- query the search tree to find the N closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=N, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N))
        #-- calculate a regression model for calculating values
        #-- read first 10 years of data to create regression model
        N = np.int(10.0 / time_step)
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            T[k] = fd['time'][k]
            #-- spatially extrapolate firn elevation or air content
            firn1 = gs[VARIABLE][k, ii, jj]
            FIRN[:, k] = np.sum(w * firn1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap_data.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero(tdec >= fd['time'].max())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after firn model
        ind, = np.nonzero(tdec >= fd['time'].max())
        #-- query the search tree to find the N closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=N, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, N))
        #-- calculate a regression model for calculating values
        #-- read last 10 years of data to create regression model
        N = np.int(10.0 / time_step)
        #-- spatially interpolate firn elevation or air content to coordinates
        FIRN = np.zeros((count, N))
        T = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at k
            T[k] = fd['time'][kk]
            #-- spatially extrapolate firn elevation or air content
            firn1 = gs[VARIABLE][kk, ii, jj]
            FIRN[:, k] = np.sum(w * firn1[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap_data[v] = regress_model(
                T,
                FIRN[n, :],
                tdec[v],
                ORDER=2,
                CYCLES=[0.25, 0.5, 1.0, 2.0, 4.0, 5.0],
                RELATIVE=T[-1])
        #-- set interpolation type (3: extrapolated forwards in time)
        extrap_data.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero(extrap_data.data == extrap_data.fill_value)
    extrap_data.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        extrap_data.fill_value = FILL_VALUE
        extrap_data.data[extrap_data.mask] = extrap_data.fill_value

    #-- return the interpolated values
    return extrap_data