Beispiel #1
0
def yearly_file_mean(input_dir, VERSION, RACMO_MODEL, PRODUCT, START, END,
                     GZIP):
    #-- names within netCDF4 files
    VARIABLE = input_products[PRODUCT]
    #-- find input files for years of interest
    regex_years = '|'.join('{0:4d}'.format(Y) for Y in range(START, END + 1))
    pattern = '{0}.({1}).BN_\d+_\d+_1km.MM.nc'.format(VARIABLE, regex_years)
    rx = re.compile(pattern, re.VERBOSE)
    input_files = sorted([fi for fi in os.listdir(input_dir) if rx.match(fi)])
    #-- number of input files
    n_files = len(input_files)
    #-- input dimensions and counter variable
    #-- get dimensions for input VERSION
    nt, ny, nx = get_dimensions(input_dir, VERSION, PRODUCT, GZIP=GZIP)
    #-- create counter variable
    c = 0
    #-- allocate for all data
    dinput = {}
    dinput['LON'] = np.zeros((ny, nx))
    dinput['LAT'] = np.zeros((ny, nx))
    dinput['x'] = np.zeros((nx))
    dinput['y'] = np.zeros((ny))
    dinput['MASK'] = np.zeros((ny, nx), dtype=np.int8)
    #-- calculate total
    dinput[VARIABLE] = np.zeros((ny, nx))
    #-- calendar year and month
    year = np.zeros((nt))
    month = np.zeros((nt))

    #-- for each file of interest
    for t in range(n_files):
        #-- Open the NetCDF file for reading
        fileID = netCDF4.Dataset(os.path.join(input_dir, input_files[t]), 'r')
        #-- Getting the data from each netCDF variable
        dinput['LON'][:, :] = fileID.variables['LON'][:, :].copy()
        dinput['LAT'][:, :] = fileID.variables['LAT'][:, :].copy()
        dinput['x'][:] = fileID.variables['x'][:].copy()
        dinput['y'][:] = fileID.variables['y'][:].copy()
        dinput['MASK'][:, :] = fileID.variables['icemask'][:, :].astype(
            np.int8)
        #-- get year from file
        year[c], = np.array(rx.findall(input_files[t]), dtype=np.float)
        #-- for each month
        for m in range(12):
            #-- calendar month
            month[c] = np.float(m + 1)
            #-- read product of interest and add to total
            dinput[VARIABLE] += fileID.variables[VARIABLE][m, :, :].copy()
            #-- add to counter
            c += 1
        #-- close the NetCDF file
        fileID.close()

    #-- calculate mean time over period
    dinput['TIME'] = np.mean(convert_calendar_decimal(year, month))
    #-- convert from total to mean
    dinput[VARIABLE] /= np.float(c)

    #-- return the mean variables
    return dinput
def yearly_file_cumulative(input_dir,
                           VERSION,
                           RACMO_MODEL,
                           PRODUCT,
                           MEAN,
                           VERBOSE=False,
                           MODE=0o775):
    #-- names within netCDF4 files
    VARIABLE = input_products[PRODUCT]
    #-- find input files for years of interest
    pattern = '{0}.(\d+).BN_\d+_\d+_1km.MM.nc'.format(VARIABLE)
    rx = re.compile(pattern, re.VERBOSE)
    input_files = sorted([fi for fi in os.listdir(input_dir) if rx.match(fi)])
    #-- number of input files
    n_files = len(input_files)
    #-- input dimensions and counter variable
    #-- get dimensions for input VERSION
    nt, ny, nx = get_dimensions(input_dir, VERSION, VARIABLE, GZIP=GZIP)
    #-- create counter variable
    c = 0
    #-- allocate for all data
    dinput = {}
    dinput['LON'] = np.zeros((ny, nx))
    dinput['LAT'] = np.zeros((ny, nx))
    dinput['x'] = np.zeros((nx))
    dinput['y'] = np.zeros((ny))
    dinput['TIME'] = np.zeros((nt))
    dinput['MASK'] = np.zeros((ny, nx), dtype=np.int8)
    dinput[VARIABLE] = np.zeros((nt, ny, nx))
    CUMULATIVE = np.zeros((ny, nx))

    #-- for each file of interest
    for t in range(n_files):
        #-- Open the NetCDF file for reading
        fileID = netCDF4.Dataset(os.path.join(input_dir, input_files[t]), 'r')
        #-- Getting the data from each netCDF variable
        dinput['LON'][:, :] = fileID.variables['LON'][:, :].copy()
        dinput['LAT'][:, :] = fileID.variables['LAT'][:, :].copy()
        dinput['x'][:] = fileID.variables['x'][:].copy()
        dinput['y'][:] = fileID.variables['y'][:].copy()
        dinput['MASK'][:, :] = fileID.variables['icemask'][:, :].astype(
            np.int8)
        #-- get year from file
        year[c], = np.array(rx.findall(input_files[t]), dtype=np.float)
        #-- for each month
        for m in range(12):
            #-- calendar month
            month[c] = np.float(m + 1)
            #-- convert to decimal format (using mid-month values)
            dinput['TIME'][c] = convert_calendar_decimal(year[c], month[c])
            #-- extract data and add to total cumulative matrix
            CUMULATIVE += (fileID.variables[VARIABLE][m, :, :].copy() - MEAN)
            dinput[VARIABLE][c, :, :] = CUMULATIVE.copy()
            #-- add to counter
            c += 1
        #-- close the NetCDF file
        fileID.close()

    #-- return the cumulative anomalies
    return dinput
Beispiel #3
0
def compressed_file_cumulative(input_dir,
                               VERSION,
                               RACMO_MODEL,
                               PRODUCT,
                               MEAN,
                               GZIP=False,
                               VERBOSE=False,
                               MODE=0o775):
    #-- names within netCDF4 files
    VARIABLE = input_products[PRODUCT]
    #-- variable of interest
    if (PRODUCT == 'SMB') or ((PRODUCT == 'PRECIP') and (VERSION == '2.0')):
        VARNAME = VARIABLE
    else:
        VARNAME = '{0}corr'.format(VARIABLE)

    #-- if reading bytes from compressed file or netcdf file directly
    gz = '.gz' if GZIP else ''
    #-- allocate for all data
    dinput = {}

    #-- input area file with ice mask and model topography
    f1 = 'Icemask_Topo_Iceclasses_lon_lat_average_1km_GrIS.nc{0}'.format(gz)
    if GZIP:
        #-- read bytes from compressed file
        fd = gzip.open(os.path.join(input_dir, f1), 'rb')
        #-- read netCDF file for topography and ice classes from bytes
        fileID = netCDF4.Dataset(uuid.uuid4().hex, mode='r', memory=fd.read())
    else:
        #-- read netCDF file for topography and ice classes
        fileID = netCDF4.Dataset(os.path.join(input_dir, f1), mode='r')
    #-- Getting the data from each netCDF variable
    dinput['LON'] = np.array(fileID.variables['LON'][:, :])
    dinput['LAT'] = np.array(fileID.variables['LAT'][:, :])
    dinput['x'] = np.array(fileID.variables['x'][:])
    dinput['y'] = np.array(fileID.variables['y'][:])
    promicemask = np.array(fileID.variables['Promicemask'][:, :])
    topography = np.array(fileID.variables['Topography'][:, :])
    #-- close the compressed file objects
    fd.close() if GZIP else fileID.close()

    #-- file format for each version
    file_format = {}
    file_format['2.0'] = '{0}.1958-2016.BN_RACMO2.3p2_FGRN11_GrIS.MM.nc{1}'
    file_format['3.0'] = '{0}.1958-2018.BN_RACMO2.3p2_FGRN055_GrIS.MM.nc{1}'

    #-- input dataset for variable
    f2 = file_format[VERSION].format(VARIABLE.lower(), gz)
    if GZIP:
        #-- read bytes from compressed file
        fd = gzip.open(os.path.join(input_dir, f2), 'rb')
        #-- read netCDF file for dataset from bytes
        fileID = netCDF4.Dataset(uuid.uuid4().hex, mode='r', memory=fd.read())
    else:
        #-- read netCDF file for dataset (could also set memory=None)
        fileID = netCDF4.Dataset(os.path.join(input_dir, f2), mode='r')
    #-- shape of the input data matrix
    nt, ny, nx = fileID.variables[VARNAME].shape

    #-- find ice sheet points from promicemask that valid
    ii, jj = np.nonzero((promicemask >= 1) & (promicemask <= 3))
    dinput['MASK'] = np.zeros((ny, nx), dtype=np.int8)
    dinput['MASK'][ii, jj] = 1

    #-- dates in year-decimal format
    dinput['TIME'] = np.zeros((nt))
    #-- calculate cumulative
    CUMULATIVE = np.zeros((ny, nx))
    dinput[VARNAME] = np.zeros((nt, ny, nx))
    #-- calculate dates
    #-- Months since 1958-01-15 at 00:00:00
    itime = np.array(fileID.variables['time'][:])
    year = np.zeros((nt))
    month = np.zeros((nt))
    for t in range(nt):
        #-- divide t by 12 to get the year
        year[t] = 1958 + np.floor(t / 12.0)
        #-- use the modulus operator to get the month
        month[t] = (t % 12) + 1
        #-- convert to decimal format (using mid-month values)
        dinput['TIME'][t] = convert_calendar_decimal(year[t], month[t])
        #-- extract data and add to total cumulative matrix
        CUMULATIVE += (fileID.variables[VARNAME][t, :, :].copy() - MEAN)
        dinput[VARNAME][t, :, :] = CUMULATIVE.copy()

    #-- close the compressed file objects
    fd.close() if GZIP else fileID.close()

    #-- return the cumulative anomalies
    return dinput
Beispiel #4
0
def extrapolate_mar_daily(DIRECTORY,
                          EPSG,
                          VERSION,
                          tdec,
                          X,
                          Y,
                          XNAME=None,
                          YNAME=None,
                          TIMENAME='TIME',
                          VARIABLE='SMB',
                          SIGMA=1.5,
                          SEARCH='BallTree',
                          NN=10,
                          POWER=2.0,
                          FILL_VALUE=None,
                          EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- regular expression pattern for MAR dataset
    rx = re.compile(r'{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS))

    #-- create list of files to read
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables[XNAME][:])
            ny = len(fileID.variables[YNAME][:])
            TIME = fileID.variables[TIMENAME][:]
            try:
                nt += np.count_nonzero(TIME.data != TIME.fill_value)
            except AttributeError:
                nt += len(TIME)

    #-- python dictionary with file variables
    fd = {}
    fd['TIME'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE)
    gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            TIME = fileID.variables['TIME'][:]
            try:
                t = np.count_nonzero(TIME.data != TIME.fill_value)
            except AttributeError:
                t = len(TIME)
            #-- create a masked array with all data
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
            fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=np.bool)
            #-- surface type
            SRF = fileID.variables['SRF'][:]
            #-- indices of specified ice mask
            i, j = np.nonzero(SRF == 4)
            #-- ice fraction
            FRA = fileID.variables['FRA'][:] / 100.0
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            #-- combine sectors for multi-layered data
            if (np.ndim(tmp) == 4):
                #-- create mask for combining data
                MASK = np.zeros((t, ny, nx))
                MASK[:, i, j] = FRA[:t, 0, i, j]
                #-- combine data
                fd[VARIABLE][:] = MASK * tmp[:t, 0, :, :] + (
                    1.0 - MASK) * tmp[:t, 1, :, :]
            else:
                #-- copy data
                fd[VARIABLE][:] = tmp[:t, :, :].copy()
            #-- verify mask object for interpolating data
            surf_mask = np.broadcast_to(SRF, (t, ny, nx))
            fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4)
            #-- combine mask object through time to create a single mask
            fd[VARIABLE].mask = fd[VARIABLE].data == fd[VARIABLE].fill_value
            fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- MAR coordinates
            fd['LON'] = fileID.variables['LON'][:, :].copy()
            fd['LAT'] = fileID.variables['LAT'][:, :].copy()
            #-- convert x and y coordinates to meters
            fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy()
            fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy()
            #-- extract delta time and epoch of time
            delta_time = fileID.variables[TIMENAME][:t].astype(np.float)
            units = fileID.variables[TIMENAME].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['TIME'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE].data[tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['CUMULATIVE'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert MAR latitude and longitude to input coordinates (EPSG)
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326))
    xg, yg = pyproj.transform(proj2, proj1, fd['LON'], fd['LAT'])

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output interpolated arrays of output variable
    npts = len(tdec)
    extrap = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float)
    extrap.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    extrap.data[:] = extrap.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['TIME'].min()) & (tdec < fd['TIME'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['TIME'].min())
                          & (tdec < fd['TIME'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- find indices for linearly interpolating in time
        f = scipy.interpolate.interp1d(fd['TIME'],
                                       np.arange(nt),
                                       kind='linear')
        date_indice = f(tind).astype(np.int)
        #-- for each unique model date
        #-- linearly interpolate in time between two model maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the NN closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=NN, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, NN))
            #-- variable for times before and after tdec
            var1 = gs['CUMULATIVE'][k, i, j]
            var2 = gs['CUMULATIVE'][k + 1, i, j]
            #-- linearly interpolate to date
            dt = (tind[kk] - fd['TIME'][k]) / (fd['TIME'][k + 1] -
                                               fd['TIME'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap.data[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \
                dt*np.sum(w*var2[indices], axis=1)
        #-- set interpolation type (1: interpolated in time)
        extrap.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero(tdec < fd['TIME'].min())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero(tdec < fd['TIME'].min())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['TIME'][k]
            #-- spatially extrapolate variable
            tmp = gs['CUMULATIVE'][k, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero(tdec >= fd['TIME'].max())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after model
        ind, = np.nonzero(tdec >= fd['TIME'].max())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['TIME'][kk]
            #-- spatially extrapolate variable
            tmp = gs['CUMULATIVE'][kk, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[-1])
        #-- set interpolation type (3: extrapolated forward in time)
        extrap.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((extrap.data == extrap.fill_value)
                          | np.isnan(extrap.data))
    extrap.mask[invalid] = True

    #-- return the interpolated values
    return extrap
Beispiel #5
0
def interpolate_racmo_daily(base_dir,
                            EPSG,
                            MODEL,
                            tdec,
                            X,
                            Y,
                            VARIABLE='smb',
                            SIGMA=1.5,
                            FILL_VALUE=None,
                            EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- input list of files
    if (MODEL == 'FGRN055'):
        #-- filename and directory for input FGRN055 files
        file_pattern = 'RACMO2.3p2_FGRN055_{0}_daily_(\d+).nc'
        DIRECTORY = os.path.join(base_dir, 'RACMO', 'GL', 'RACMO2.3p2_FGRN055')

    #-- create list of files to read
    rx = re.compile(file_pattern.format(VARIABLE, YRS), re.VERBOSE)
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables['rlon'][:])
            ny = len(fileID.variables['rlat'][:])
            nt += len(fileID.variables['time'][:])
            #-- invalid data value
            fv = np.float(fileID.variables[VARIABLE]._FillValue)

    #-- scaling factor for converting units
    if (VARIABLE == 'hgtsrf'):
        scale_factor = 86400.0
    elif (VARIABLE == 'smb'):
        scale_factor = 1.0

    #-- python dictionary with file variables
    fd = {}
    fd['time'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['cumulative'] = np.ma.zeros((nt, ny, nx), fill_value=fv)
    gs['cumulative'].mask = np.zeros((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            t = len(fileID.variables['time'][:])
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
            fd[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            fd[VARIABLE][:] = scale_factor * tmp
            #-- indices of specified ice mask
            i, j = np.nonzero(tmp[0, :, :] != fv)
            fd[VARIABLE].mask[:, i, j] = False
            #-- combine mask object through time to create a single mask
            fd['mask'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- racmo coordinates
            fd['lon'] = fileID.variables['lon'][:, :].copy()
            fd['lat'] = fileID.variables['lat'][:, :].copy()
            fd['x'] = fileID.variables['rlon'][:].copy()
            fd['y'] = fileID.variables['rlat'][:].copy()
            #-- rotated pole parameters
            proj4_params = fileID.variables['rotated_pole'].proj4_params
            #-- extract delta time and epoch of time
            delta_time = fileID.variables['time'][:].astype(np.float)
            units = fileID.variables['time'].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['time'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE][tt, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE][tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['cumulative'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['cumulative'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert projection from input coordinates (EPSG) to model coordinates
    #-- RACMO models are rotated pole latitude and longitude
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj(proj4_params)
    #-- calculate rotated pole coordinates of input coordinates
    ix, iy = pyproj.transform(proj1, proj2, X, Y)

    #-- check that input points are within convex hull of valid model points
    gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y'])
    v, triangle = find_valid_triangulation(gs['x'][ii, jj], gs['y'][ii, jj])
    #-- check where points are within the complex hull of the triangulation
    if v:
        interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1)
        valid = (triangle.find_simplex(interp_points) >= 0)
    else:
        #-- Check ix and iy against the bounds of x and y
        valid = (ix >= fd['x'].min()) & (ix <= fd['x'].max()) & \
            (iy >= fd['y'].min()) & (iy <= fd['y'].max())

    #-- output interpolated arrays of model variable
    npts = len(tdec)
    interp = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    interp.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    interp.data[:] = interp.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    interp.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec <= fd['time'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec <= fd['time'].max()) & valid)
        #-- create an interpolator for model variable
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['y'], fd['x']), gs['cumulative'].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['time'], fd['y'], fd['x']), gs['cumulative'].mask)

        #-- interpolate to points
        interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        #-- set interpolation type (1: interpolated)
        interp.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['time'].min()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero((tdec < fd['time'].min()) & valid)
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['time'][k]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['cumulative'].data[k, :, :].T, kx=1, ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['cumulative'].mask[k, :, :].T, kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[0])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['time'].max()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after model
        ind, = np.nonzero((tdec > fd['time'].max()) & valid)
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['time'][kk]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['cumulative'].data[kk, :, :].T,
                kx=1,
                ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['cumulative'].mask[kk, :, :].T,
                kx=1,
                ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[-1])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((interp.data == interp.fill_value)
                          | np.isnan(interp.data))
    interp.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        interp.fill_value = FILL_VALUE
        interp.data[interp.mask] = interp.fill_value

    #-- return the interpolated values
    return interp
Beispiel #6
0
def mar_smb_cumulative(input_dir, VERSION, PRODUCT, RANGE=[1961,1990],
    DOWNSCALED=False, VERBOSE=False, MODE=0o775):

    #-- regular expression pattern for MAR dataset
    rx = re.compile('MAR{0}-monthly-(.*?)-(\d+).nc$'.format(VERSION))
    #-- netCDF4 variable names (for both direct and derived products)
    input_products = {}
    #-- SMB from downscaled product
    if DOWNSCALED:
        #-- variable coordinates
        XNAME,YNAME,TIMENAME = ('x','y','time')
        #-- SMBcorr is topography corrected SMB for the ice covered area
        #-- SMB2 is the SMB for the tundra covered area
        input_products['SMB'] = ['SMBcorr','SMB2']
        #-- RU from downscaled product
        #-- RUcorr is topography corrected runoff for the ice covered area
        #-- RU2corr is topography corrected runoff for the tundra covered area
        input_products['RUNOFF'] = ['RUcorr','RU2corr']
        input_products['PRECIP'] = ['RF','SF']
        input_products['SNOWFALL'] = 'SF'
        #-- ME from downscaled product
        #-- MEcorr is topography corrected melt
        input_products['SNOWMELT'] = 'MEcorr'
        input_products['SUBLIM'] = 'SU'
        input_products['REFREEZE'] = ['MEcorr','RUcorr','RU2corr']
        input_products['RAINFALL'] = 'RF'
        #-- downscaled projection: WGS84/NSIDC Sea Ice Polar Stereographic North
        proj4_params = "+init=EPSG:{0:d}".format(3413)
    else:
        #-- variable coordinates
        XNAME,YNAME,TIMENAME = ('X10_105','Y21_199','TIME')
        #-- SMB is SMB for the ice covered area
        input_products['SMB'] = 'SMB'
        #-- RU is runoff for the ice covered area
        #-- RU2 is runoff for the tundra covered area
        input_products['RUNOFF'] = ['RU','RU2']
        input_products['PRECIP'] = ['RF','SF']
        input_products['SNOWFALL'] = 'SF'
        input_products['SNOWMELT'] = 'ME'
        input_products['SUBLIM'] = 'SU'
        input_products['REFREEZE'] = 'RZ'
        input_products['RAINFALL'] = 'RF'
        #-- MAR model projection: Polar Stereographic (Oblique)
        #-- Earth Radius: 6371229 m
        #-- True Latitude: 0
        #-- Center Longitude: -40
        #-- Center Latitude: 70.5
        proj4_params = ("+proj=sterea +lat_0=+70.5 +lat_ts=0 +lon_0=-40.0 "
            "+a=6371229 +no_defs")

    #-- create flag to differentiate between direct and directed products
    if (np.ndim(input_products[PRODUCT]) == 0):
        #-- direct products
        derived_product = False
    else:
        #-- derived products
        derived_product = True

    #-- Open the NetCDF4 file for reading
    mean_filename = 'MAR_{0}_{1}_mean_{2:4.0f}-{3:4.0f}.nc'
    MEAN_FILE = mean_filename.format(VERSION,PRODUCT,RANGE[0],RANGE[1])
    with netCDF4.Dataset(os.path.join(input_dir,MEAN_FILE), 'r') as fileID:
        MEAN = fileID.variables[PRODUCT][:,:].copy()

    #-- output subdirectory
    output_sub = 'MAR_{0}_{1}_cumul'
    output_dir = os.path.join(input_dir,output_sub.format(VERSION,PRODUCT))
    os.makedirs(output_dir,MODE) if not os.access(output_dir,os.F_OK) else None
    #-- output netCDF4 title format
    TITLE = 'Cumulative_anomalies_relative_to_{0:4d}-{1:4d}_Mean'

    #-- find input files
    input_files=sort_files(rx,[f for f in os.listdir(input_dir) if rx.match(f)])
    #-- input dimensions and counter variable
    #-- get dimensions for input dataset
    ny,nx = get_dimensions(input_dir,input_files,XNAME,YNAME)
    #-- allocate for all data
    CUMUL = {}
    CUMUL['LON'] = np.zeros((ny,nx))
    CUMUL['LAT'] = np.zeros((ny,nx))
    CUMUL['VALID'] = np.zeros((ny,nx),dtype=np.bool)
    CUMUL['x'] = np.zeros((nx))
    CUMUL['y'] = np.zeros((ny))
    #-- calculate cumulative anomalies
    CUMUL[PRODUCT] = np.ma.zeros((ny,nx),fill_value=-9999.0)
    CUMUL[PRODUCT].mask = np.ones((ny,nx),dtype=np.bool)
    #-- input monthly data
    MONTH = {}
    MONTH['MASK'] = np.zeros((ny,nx))

    #-- for each file
    for t,input_file in enumerate(input_files):
        #-- Open the NetCDF file for reading
        fileID = netCDF4.Dataset(os.path.join(input_dir,input_file), 'r')
        #-- Getting the data from each netCDF variable
        #-- latitude and longitude
        CUMUL['LON'][:,:] = fileID.variables['LON'][:,:].copy()
        CUMUL['LAT'][:,:] = fileID.variables['LAT'][:,:].copy()
        #-- extract model x and y
        CUMUL['x'][:] = fileID.variables[XNAME][:].copy()
        CUMUL['y'][:] = fileID.variables[YNAME][:].copy()
        #-- get reanalysis and year from file
        reanalysis,year = rx.findall(input_file).pop()
        #-- convert from months since year start to calendar month
        months = fileID.variables[TIMENAME][:].copy() + 1.0
        #-- read land/ice mask
        LAND_MASK = fileID.variables['MSK'][:,:].copy()
        #-- finding valid points only from land mask
        iy,ix = np.nonzero(LAND_MASK > 1)
        CUMUL['VALID'][iy,ix] = True
        CUMUL[PRODUCT].mask[iy,ix] = False
        #-- read downscaled masks
        if DOWNSCALED:
            #-- read glacier and ice sheet mask (tundra=1, permanent ice=2)
            MASK_MAR = fileID.variables['MSK_MAR'][:,:].copy()
            SURF_MAR = fileID.variables['SRF_MAR'][:,:].copy()
            iy,ix = np.nonzero((SURF_MAR >= 0.0) & (LAND_MASK > 1))
            MONTH['MASK'][iy,ix] = MASK_MAR[iy,ix]
        else:
            MONTH['MASK'][iy,ix] = 2.0

        #-- invalid value from MAR product
        FILL_VALUE = fileID.variables['SMB']._FillValue

        #-- for each month
        for m,mon in enumerate(months):
            #-- calculate time in decimal format (m+1 to convert from indice)
            #-- convert to decimal format (uses matrix algebra for speed)
            CUMUL['TIME'] = convert_calendar_decimal(np.float(year),mon)
            #-- read each product of interest contained within the dataset
            #-- read variables for both direct and derived products
            if derived_product:
                for p in input_products[PRODUCT]:
                    MONTH[p] = fileID.variables[p][m,:,:].copy()
            else:
                p = input_products[PRODUCT]
                MONTH[PRODUCT] = fileID.variables[p][m,:,:].copy()

            #-- calculate derived products
            if (PRODUCT == 'PRECIP'):
                #-- PRECIP = SNOWFALL + RAINFALL
                MONTH['PRECIP'] = MONTH['SF'] + MONTH['RF']
            elif (PRODUCT == 'REFREEZE') and DOWNSCALED:
                #-- runoff from permanent ice covered regions and tundra regions
                RU1,RU2 = input_products['RUNOFF']
                ME = input_products['SNOWMELT']
                MONTH['RUNOFF'] = (MONTH['MASK'] - 1.0)*dinput[RU1] + \
                    (2.0 - MONTH['MASK'])*dinput[RU2]
                #-- REFREEZE = (total) SNOWMELT - RUNOFF
                MONTH['REFREEZE'] = MONTH[ME] - MONTH['RUNOFF']
            elif (PRODUCT == 'RUNOFF'):
                #-- runoff from permanent ice covered regions and tundra regions
                RU1,RU2 = input_products['RUNOFF']
                MONTH['RUNOFF'] = (MONTH['MASK'] - 1.0)*dinput[RU1] + \
                    (2.0 - MONTH['MASK'])*dinput[RU2]
            elif (PRODUCT == 'SMB'):
                #-- SMB from permanent ice covered regions and tundra regions
                SMB1,SMB2 = input_products['SMB']
                MONTH['SMB'] = (MONTH['MASK'] - 1.0)*MONTH[SMB1] + \
                    (2.0 - MONTH['MASK'])*MONTH[SMB2]

            #-- calculate cumulative for each time step
            CUMUL[PRODUCT].data[iy,ix] += MONTH[PRODUCT][iy,ix] - MEAN[iy,ix]
            #-- replace masked values with fill value
            CUMUL[PRODUCT].data[CUMUL[PRODUCT].mask] = CUMUL[PRODUCT].fill_value
            #-- output netCDF4 filename
            args = (VERSION, PRODUCT, year, mon)
            cumul_file = 'MAR_{0}_{1}_cumul_{2}_{3:02.0f}.nc'.format(*args)
            create_netCDF4(CUMUL, FILENAME=os.path.join(output_dir,cumul_file),
                UNITS='mmWE', LONGNAME=longname[PRODUCT], VARNAME=PRODUCT,
                LONNAME='LON', LATNAME='LAT', XNAME='x', YNAME='y',
                TIMENAME='TIME', MASKNAME='VALID', VERBOSE=VERBOSE,
                PROJECTION=proj4_params, TITLE=TITLE.format(RANGE[0],RANGE[1]))
            #-- change the permissions mode
            os.chmod(os.path.join(output_dir,cumul_file),MODE)

        #-- close the netcdf file
        fileID.close()
Beispiel #7
0
def interpolate_mar_daily(DIRECTORY,
                          EPSG,
                          VERSION,
                          tdec,
                          X,
                          Y,
                          XNAME=None,
                          YNAME=None,
                          TIMENAME='TIME',
                          VARIABLE='SMB',
                          SIGMA=1.5,
                          FILL_VALUE=None,
                          EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- regular expression pattern for MAR dataset
    rx = re.compile('{0}-(.*?)-(\d+)(_subset)?.nc$'.format(VERSION, YRS))

    #-- MAR model projection: Polar Stereographic (Oblique)
    #-- Earth Radius: 6371229 m
    #-- True Latitude: 0
    #-- Center Longitude: -40
    #-- Center Latitude: 70.5
    proj4_params = ("+proj=sterea +lat_0=+70.5 +lat_ts=0 +lon_0=-40.0 "
                    "+a=6371229 +no_defs")

    #-- create list of files to read
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables[XNAME][:])
            ny = len(fileID.variables[YNAME][:])
            nt += len(fileID.variables[TIMENAME][:])

    #-- python dictionary with file variables
    fd = {}
    fd['TIME'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['CUMULATIVE'] = np.ma.zeros((nt, ny, nx), fill_value=FILL_VALUE)
    gs['CUMULATIVE'].mask = np.ones((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the MAR NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            t = len(fileID.variables['TIME'][:])
            #-- create a masked array with all data
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
            fd[VARIABLE].mask = np.zeros((t, ny, nx), dtype=np.bool)
            #-- surface type
            SRF = fileID.variables['SRF'][:]
            #-- indices of specified ice mask
            i, j = np.nonzero(SRF == 4)
            #-- ice fraction
            FRA = fileID.variables['FRA'][:] / 100.0
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            #-- combine sectors for multi-layered data
            if (np.ndim(tmp) == 4):
                #-- create mask for combining data
                MASK = np.zeros((nt, ny, nx))
                MASK[:, i, j] = FRA[:, 0, i, j]
                #-- combine data
                fd[VARIABLE][:] = MASK * tmp[:, 0, :, :] + (
                    1.0 - MASK) * tmp[:, 1, :, :]
            else:
                #-- copy data
                fd[VARIABLE][:] = tmp.copy()
            #-- verify mask object for interpolating data
            surf_mask = np.broadcast_to(SRF, (t, ny, nx))
            fd[VARIABLE].mask[:, :, :] |= (surf_mask != 4)
            #-- combine mask object through time to create a single mask
            fd['MASK'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- MAR coordinates
            fd['LON'] = fileID.variables['LON'][:, :].copy()
            fd['LAT'] = fileID.variables['LAT'][:, :].copy()
            #-- convert x and y coordinates to meters
            fd['x'] = 1000.0 * fileID.variables[XNAME][:].copy()
            fd['y'] = 1000.0 * fileID.variables[YNAME][:].copy()
            #-- extract delta time and epoch of time
            delta_time = fileID.variables[TIMENAME][:].astype(np.float)
            units = fileID.variables[TIMENAME].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['TIME'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['MASK'] = scipy.ndimage.gaussian_filter(fd['MASK'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['MASK']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=FILL_VALUE)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE].data[tt, ii, jj] = temp2[ii, jj] / gs['MASK'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE].data[tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['CUMULATIVE'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['CUMULATIVE'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert projection from input coordinates (EPSG) to model coordinates
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj(proj4_params)
    #-- calculate projected coordinates of input coordinates
    ix, iy = pyproj.transform(proj1, proj2, X, Y)

    #-- check that input points are within convex hull of valid model points
    gs['x'], gs['y'] = np.meshgrid(fd['x'], fd['y'])
    points = np.concatenate((gs['x'][ii, jj, None], gs['y'][ii, jj, None]),
                            axis=1)
    triangle = scipy.spatial.Delaunay(points.data,
                                      qhull_options='Qt Qbb Qc Qz')
    interp_points = np.concatenate((ix[:, None], iy[:, None]), axis=1)
    valid = (triangle.find_simplex(interp_points) >= 0)

    #-- output interpolated arrays of model variable
    npts = len(tdec)
    interp = np.ma.zeros((npts), fill_value=FILL_VALUE, dtype=np.float)
    interp.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    interp.data[:] = interp.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    interp.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['TIME'].min()) & (tdec <= fd['TIME'].max()) & valid):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['TIME'].min())
                          & (tdec <= fd['TIME'].max()) & valid)
        #-- create an interpolator for model variable
        RGI = scipy.interpolate.RegularGridInterpolator(
            (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].data)
        #-- create an interpolator for input mask
        MI = scipy.interpolate.RegularGridInterpolator(
            (fd['TIME'], fd['y'], fd['x']), gs['CUMULATIVE'].mask)

        #-- interpolate to points
        interp.data[ind] = RGI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        interp.mask[ind] = MI.__call__(np.c_[tdec[ind], iy[ind], ix[ind]])
        #-- set interpolation type (1: interpolated)
        interp.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero((tdec < fd['TIME'].min()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero((tdec < fd['TIME'].min()) & valid)
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['TIME'][k]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['CUMULATIVE'].data[k, :, :].T, kx=1, ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'], fd['y'], gs['CUMULATIVE'].mask[k, :, :].T, kx=1, ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[0])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (2: extrapolated backward)
        interp.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero((tdec > fd['TIME'].max()) & valid)
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after model
        ind, = np.nonzero((tdec > fd['TIME'].max()) & valid)
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate model variable to coordinates
        DATA = np.zeros((count, N))
        MASK = np.zeros((count, N), dtype=np.bool)
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['TIME'][kk]
            #-- spatially interpolate model variable
            S1 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['CUMULATIVE'].data[kk, :, :].T,
                kx=1,
                ky=1)
            S2 = scipy.interpolate.RectBivariateSpline(
                fd['x'],
                fd['y'],
                gs['CUMULATIVE'].mask[kk, :, :].T,
                kx=1,
                ky=1)
            #-- create numpy masked array of interpolated values
            DATA[:, k] = S1.ev(ix[ind], iy[ind])
            MASK[:, k] = S2.ev(ix[ind], iy[ind])
        #-- calculate regression model
        for n, v in enumerate(ind):
            interp.data[v] = regress_model(TIME,
                                           DATA[n, :],
                                           tdec[v],
                                           ORDER=2,
                                           CYCLES=[0.25, 0.5, 1.0],
                                           RELATIVE=TIME[-1])
        #-- mask any invalid points
        interp.mask[ind] = np.any(MASK, axis=1)
        #-- set interpolation type (3: extrapolated forward)
        interp.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((interp.data == interp.fill_value)
                          | np.isnan(interp.data))
    interp.mask[invalid] = True

    #-- return the interpolated values
    return interp
Beispiel #8
0
def extrapolate_racmo_daily(base_dir,
                            EPSG,
                            MODEL,
                            tdec,
                            X,
                            Y,
                            VARIABLE='smb',
                            SIGMA=1.5,
                            SEARCH='BallTree',
                            NN=10,
                            POWER=2.0,
                            FILL_VALUE=None,
                            EXTRAPOLATE=False):

    #-- start and end years to read
    SY = np.nanmin(np.floor(tdec)).astype(np.int)
    EY = np.nanmax(np.floor(tdec)).astype(np.int)
    YRS = '|'.join(['{0:4d}'.format(Y) for Y in range(SY, EY + 1)])
    #-- input list of files
    if (MODEL == 'FGRN055'):
        #-- filename and directory for input FGRN055 files
        file_pattern = 'RACMO2.3p2_FGRN055_{0}_daily_(\d+).nc'
        DIRECTORY = os.path.join(base_dir, 'RACMO', 'GL', 'RACMO2.3p2_FGRN055')

    #-- create list of files to read
    rx = re.compile(file_pattern.format(VARIABLE, YRS), re.VERBOSE)
    input_files = sorted([f for f in os.listdir(DIRECTORY) if rx.match(f)])

    #-- calculate number of time steps to read
    nt = 0
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            nx = len(fileID.variables['rlon'][:])
            ny = len(fileID.variables['rlat'][:])
            nt += len(fileID.variables['time'][:])
            #-- invalid data value
            fv = np.float(fileID.variables[VARIABLE]._FillValue)

    #-- scaling factor for converting units
    if (VARIABLE == 'hgtsrf'):
        scale_factor = 86400.0
    elif (VARIABLE == 'smb'):
        scale_factor = 1.0

    #-- python dictionary with file variables
    fd = {}
    fd['time'] = np.zeros((nt))
    #-- python dictionary with gaussian filtered variables
    gs = {}
    #-- calculate cumulative sum of gaussian filtered values
    cumulative = np.zeros((ny, nx))
    gs['cumulative'] = np.ma.zeros((nt, ny, nx), fill_value=fv)
    gs['cumulative'].mask = np.zeros((nt, ny, nx), dtype=np.bool)
    #-- create a counter variable for filling variables
    c = 0
    #-- for each file in the list
    for f, FILE in enumerate(input_files):
        #-- Open the RACMO NetCDF file for reading
        with netCDF4.Dataset(os.path.join(DIRECTORY, FILE), 'r') as fileID:
            #-- number of time variables within file
            t = len(fileID.variables['time'][:])
            fd[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
            fd[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
            #-- Get data from netCDF variable and remove singleton dimensions
            tmp = np.squeeze(fileID.variables[VARIABLE][:])
            fd[VARIABLE][:] = scale_factor * tmp
            #-- indices of specified ice mask
            i, j = np.nonzero(tmp[0, :, :] != fv)
            fd[VARIABLE].mask[:, i, j] = False
            #-- combine mask object through time to create a single mask
            fd['mask'] = 1.0 - np.any(fd[VARIABLE].mask, axis=0).astype(
                np.float)
            #-- racmo coordinates
            fd['lon'] = fileID.variables['lon'][:, :].copy()
            fd['lat'] = fileID.variables['lat'][:, :].copy()
            fd['x'] = fileID.variables['rlon'][:].copy()
            fd['y'] = fileID.variables['rlat'][:].copy()
            #-- rotated pole parameters
            proj4_params = fileID.variables['rotated_pole'].proj4_params
            #-- extract delta time and epoch of time
            delta_time = fileID.variables['time'][:].astype(np.float)
            units = fileID.variables['time'].units
        #-- convert epoch of time to Julian days
        Y1, M1, D1, h1, m1, s1 = [
            float(d) for d in re.findall('\d+\.\d+|\d+', units)
        ]
        epoch_julian = calc_julian_day(Y1,
                                       M1,
                                       D1,
                                       HOUR=h1,
                                       MINUTE=m1,
                                       SECOND=s1)
        #-- calculate time array in Julian days
        Y2, M2, D2, h2, m2, s2 = convert_julian(epoch_julian + delta_time)
        #-- calculate time in year-decimal
        fd['time'][c:c + t] = convert_calendar_decimal(Y2,
                                                       M2,
                                                       D2,
                                                       HOUR=h2,
                                                       MINUTE=m2,
                                                       SECOND=s2)
        #-- use a gaussian filter to smooth mask
        gs['mask'] = scipy.ndimage.gaussian_filter(fd['mask'],
                                                   SIGMA,
                                                   mode='constant',
                                                   cval=0)
        #-- indices of smoothed ice mask
        ii, jj = np.nonzero(np.ceil(gs['mask']) == 1.0)
        #-- use a gaussian filter to smooth each model field
        gs[VARIABLE] = np.ma.zeros((t, ny, nx), fill_value=fv)
        gs[VARIABLE].mask = np.ones((t, ny, nx), dtype=np.bool)
        #-- for each time
        for tt in range(t):
            #-- replace fill values before smoothing data
            temp1 = np.zeros((ny, nx))
            i, j = np.nonzero(~fd[VARIABLE].mask[tt, :, :])
            temp1[i, j] = fd[VARIABLE][tt, i, j].copy()
            #-- smooth spatial field
            temp2 = scipy.ndimage.gaussian_filter(temp1,
                                                  SIGMA,
                                                  mode='constant',
                                                  cval=0)
            #-- scale output smoothed field
            gs[VARIABLE][tt, ii, jj] = temp2[ii, jj] / gs['mask'][ii, jj]
            #-- replace valid values with original
            gs[VARIABLE][tt, i, j] = temp1[i, j]
            #-- set mask variables for time
            gs[VARIABLE].mask[tt, ii, jj] = False
            #-- calculate cumulative
            cumulative[ii, jj] += gs[VARIABLE][tt, ii, jj]
            gs['cumulative'].data[c + tt, ii, jj] = np.copy(cumulative[ii, jj])
            gs['cumulative'].mask[c + tt, ii, jj] = False
        #-- add to counter
        c += t

    #-- convert RACMO latitude and longitude to input coordinates (EPSG)
    proj1 = pyproj.Proj("+init={0}".format(EPSG))
    proj2 = pyproj.Proj("+init=EPSG:{0:d}".format(4326))
    xg, yg = pyproj.transform(proj2, proj1, fd['lon'], fd['lat'])

    #-- construct search tree from original points
    #-- can use either BallTree or KDTree algorithms
    xy1 = np.concatenate((xg[i, j, None], yg[i, j, None]), axis=1)
    tree = BallTree(xy1) if (SEARCH == 'BallTree') else KDTree(xy1)

    #-- output interpolated arrays of variable
    npts = len(tdec)
    extrap = np.ma.zeros((npts), fill_value=fv, dtype=np.float)
    extrap.mask = np.ones((npts), dtype=np.bool)
    #-- initially set all values to fill value
    extrap.data[:] = extrap.fill_value
    #-- type designating algorithm used (1:interpolate, 2:backward, 3:forward)
    extrap.interpolation = np.zeros((npts), dtype=np.uint8)

    #-- find days that can be interpolated
    if np.any((tdec >= fd['time'].min()) & (tdec < fd['time'].max())):
        #-- indices of dates for interpolated days
        ind, = np.nonzero((tdec >= fd['time'].min())
                          & (tdec < fd['time'].max()))
        #-- reduce x, y and t coordinates
        xind, yind, tind = (X[ind], Y[ind], tdec[ind])
        #-- find indices for linearly interpolating in time
        f = scipy.interpolate.interp1d(fd['time'],
                                       np.arange(nt),
                                       kind='linear')
        date_indice = f(tind).astype(np.int)
        #-- for each unique racmo date
        #-- linearly interpolate in time between two racmo maps
        #-- then then inverse distance weighting to extrapolate in space
        for k in np.unique(date_indice):
            kk, = np.nonzero(date_indice == k)
            count = np.count_nonzero(date_indice == k)
            #-- query the search tree to find the NN closest points
            xy2 = np.concatenate((xind[kk, None], yind[kk, None]), axis=1)
            dist, indices = tree.query(xy2, k=NN, return_distance=True)
            #-- normalized weights if POWER > 0 (typically between 1 and 3)
            #-- in the inverse distance weighting
            power_inverse_distance = dist**(-POWER)
            s = np.sum(power_inverse_distance, axis=1)
            w = power_inverse_distance / np.broadcast_to(
                s[:, None], (count, NN))
            #-- variable for times before and after tdec
            var1 = gs['cumulative'][k, i, j]
            var2 = gs['cumulative'][k + 1, i, j]
            #-- linearly interpolate to date
            dt = (tind[kk] - fd['time'][k]) / (fd['time'][k + 1] -
                                               fd['time'][k])
            #-- spatially extrapolate using inverse distance weighting
            extrap[kk] = (1.0-dt)*np.sum(w*var1[indices],axis=1) + \
                dt*np.sum(w*var2[indices], axis=1)
        #-- set interpolation type (1: interpolated in time)
        extrap.interpolation[ind] = 1

    #-- check if needing to extrapolate backwards in time
    count = np.count_nonzero(tdec < fd['time'].min())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates before model
        ind, = np.nonzero(tdec < fd['time'].min())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the first year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            #-- time at k
            TIME[k] = fd['time'][k]
            #-- spatially extrapolate variable
            tmp = gs['cumulative'][k, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap[v] = regress_model(TIME,
                                      DATA[n, :],
                                      tdec[v],
                                      ORDER=2,
                                      CYCLES=[0.25, 0.5, 1.0],
                                      RELATIVE=TIME[0])
        #-- set interpolation type (2: extrapolated backwards in time)
        extrap.interpolation[ind] = 2

    #-- check if needing to extrapolate forward in time
    count = np.count_nonzero(tdec >= fd['time'].max())
    if (count > 0) and EXTRAPOLATE:
        #-- indices of dates after racmo model
        ind, = np.nonzero(tdec >= fd['time'].max())
        #-- query the search tree to find the NN closest points
        xy2 = np.concatenate((X[ind, None], Y[ind, None]), axis=1)
        dist, indices = tree.query(xy2, k=NN, return_distance=True)
        #-- normalized weights if POWER > 0 (typically between 1 and 3)
        #-- in the inverse distance weighting
        power_inverse_distance = dist**(-POWER)
        s = np.sum(power_inverse_distance, axis=1)
        w = power_inverse_distance / np.broadcast_to(s[:, None], (count, NN))
        #-- read the last year of data to create regression model
        N = 365
        #-- calculate a regression model for calculating values
        #-- spatially interpolate variable to coordinates
        DATA = np.zeros((count, N))
        TIME = np.zeros((N))
        #-- create interpolated time series for calculating regression model
        for k in range(N):
            kk = nt - N + k
            #-- time at kk
            TIME[k] = fd['time'][kk]
            #-- spatially extrapolate variable
            tmp = gs['cumulative'][kk, i, j]
            DATA[:, k] = np.sum(w * tmp[indices], axis=1)
        #-- calculate regression model
        for n, v in enumerate(ind):
            extrap[v] = regress_model(TIME,
                                      DATA[n, :],
                                      tdec[v],
                                      ORDER=2,
                                      CYCLES=[0.25, 0.5, 1.0],
                                      RELATIVE=TIME[-1])
        #-- set interpolation type (3: extrapolated forward in time)
        extrap.interpolation[ind] = 3

    #-- complete mask if any invalid in data
    invalid, = np.nonzero((extrap.data == extrap.fill_value)
                          | np.isnan(extrap.data))
    extrap.mask[invalid] = True
    #-- replace fill value if specified
    if FILL_VALUE:
        extrap.fill_value = FILL_VALUE
        extrap.data[extrap.mask] = extrap.fill_value

    #-- return the interpolated values
    return extrap