Ejemplo n.º 1
0
def hist_sum(da_escvel, levels, start, end) -> np.ndarray:
    """From a DataArray of escape velocities and levels array, create an 
    array for the sum of histogram bins over a series of decades."""
    bins = np.array(levels)
    hist_sum = histogram(da_escvel[start], bins=[bins]).data
    for i in range(start + 1, end):
        h = histogram(da_escvel[i], bins=[bins]).data
        hist_sum += h
    return hist_sum
Ejemplo n.º 2
0
def vertical_rebin(data, bin_data, bins, dz, vert_dim="st_ocean"):
    nanmask = np.isnan(data)
    # Should we also check the bin data for nans?
    full_sum = histogram(
        bin_data.where(~nanmask),
        bins=[bins],
        weights=(data * dz).where(~nanmask),
        dim=[vert_dim],
    )
    return full_sum
Ejemplo n.º 3
0
def compute_values(ds, geometry, dataset, variable, years, depth):
    if dataset == 'historic':
        start_date = years[0]
        end_date = years[1]
    else:
        start_date = np.datetime64(
            datetime.strptime(f'{years[0]}-12-31', "%Y-%m-%d"))
        end_date = np.datetime64(
            datetime.strptime(f'{years[1]}-12-31', "%Y-%m-%d"))

    xmin, ymax, xmax, ymin = geometry.bounds
    ds_index = ds.where(ds['mask'].isin(0.0)).sel(depth=depth,
                                                  lon=slice(xmin, xmax),
                                                  lat=slice(ymin, ymax),
                                                  time=slice(
                                                      start_date, end_date))

    # Get difference between two dates
    diff = ds_index.loc[dict(time=end_date)] - ds_index.loc[dict(
        time=start_date)]

    # Get counts and binds of the histogram
    if (dataset == 'experimental') and (variable == 'stocks'):
        diff = diff[variable] / 10.
    else:
        diff = diff[variable]

    nBinds = binds(dataset, variable)
    bindsRange = ranges(dataset, variable)

    bins = np.linspace(bindsRange[0], bindsRange[1], nBinds + 1)
    h = histogram(diff, bins=[bins], dim=['lat', 'lon'], block_size=1)

    counts = h.values
    mean_diff = diff.mean(skipna=True).values
    if (dataset == 'experimental') and (variable == 'stocks'):
        mean_values = ds_index[variable].mean(['lon', 'lat']).values / 10.
    else:
        mean_values = ds_index[variable].mean(['lon', 'lat']).values

    if dataset == 'historic':
        mean_years = ds.coords.get('time').values
    else:
        mean_years = [
            int(str(x).split('-')[0])
            for x in ds_index.coords.get('time').values
        ]

    # Replace NaNs with ""
    if np.isnan(mean_diff):
        mean_diff = None

    mean_values = [None if np.isnan(x) else x for x in mean_values]

    return counts, bins, mean_diff, mean_years, mean_values
Ejemplo n.º 4
0
def hist_mean(da_escvel: xr.DataArray,
              levels: np.ndarray,
              start: int,
              end: int,
              name: str = 'Esc Vel Distribution') -> np.ndarray:
    """From a DataArray of escape velocities, levels array, start and
    end index, and name, create a DataArray for the mean of histogram 
    bins over a series of decades."""
    bins = np.array(levels)
    hist_sum = histogram(da_escvel[start], bins=[bins]).data
    for i in range(start + 1, end):
        h = histogram(da_escvel[i], bins=[bins]).data
        hist_sum += h
    hist_mean = hist_sum / hist_sum.sum()
    hist_mean = xr.DataArray(hist_mean,
                             dims=['bin_edges'],
                             coords=[np.delete(bins,
                                               len(bins) - 1)]).rename(name)

    return hist_mean
Ejemplo n.º 5
0
def calc_E(l,dl,l_i_vals,c=None,weight=None,dims=['xh','yh','zl']):
    '''Transport of mass or tracer across contours of [l], due to processes [dl].
    l : xr.DataArray;
        Intensive variable (e.g. temp) that defines layers across which transport
        will be determined.
    dl : xr.DataArray;
        Tendency of l due to (individual or total sum of) diffusive processes,
        e.g. heat tendencies
        Multiple processes are included as Dataarrays inside a dataset.
    l_i_vals : ndarray or xr.DataArray;
        Interface values of l; transport will be across midpoints of these interfaces
    c : None or scalar or xr.DataArray
        If included, evaluating transport of tracer as opposed to volume
        A scalar could be density, to get mass rather than volume transport
    weight : xr.DataArray;
        Distribution of weights by which to multiply [dl] to remove spatial
        dimension of units. E.g. heat flux in Wm-2 should be multiplied by the area
        of the grid cell to recover total heating.
    dims : list of string;
        Dimensions along which to perform histogram.'''
    
    import xarray as xr
    import numpy as np
    from xhistogram.xarray import histogram
    
    # Get the spacing of contours of l
    delta_l_vals = np.diff(l_i_vals)
    # xhistogram has some curiousities around nan-values, so exclude them
    nanmask = np.isnan(l)
    
    # Specify what should be integrated
    # (this is rather clunky with if statements, but is meant to avoid loading
    # unnecessary variables, such as dummy [c]'s or [weight]'s)
    if c==None:
        if weight is None:
            weights = dl
        else:
            weights = dl*weight
    else:
        if weight is None:
            weights = c*dl
        else:
            weights = c*dl*weight
            
    # Integrate [weights] within layers of [l] and divide by [delta_l]
    E = histogram(l.where(~nanmask),
                  bins=[l_i_vals],
                  weights=weights.where(~nanmask),
                  dim=dims,
                  block_size=1)/delta_l_vals
    
    return E
Ejemplo n.º 6
0
def calc_P(p, l, l_i_vals, area, greaterthan=True):
    '''Integration of quantity [p] across volume with [l] greater than *mid-point* 
    values between layer interfaces [l_i_vals] (i.e. the center layer values).
    This is done so that the output of calc_P aligns with that of calc_G and calc_E,
    when given the same l_i_vals.
    
    *** IMPORTANT *** 
    Limits of l_i_vals should be such that their mid-points span the full range of l 
    *****************
    
    p : quantity to be integrated (e.g. tendencies of independent tracer [c])
    l : intensive variable, to define volume boundary
    l_i_vals : interface values of l 
        (integral is done for l[i] >= 0.5*(l_i_vals[i]+l_i_vals[i+1]))
    area : 2d distribution of horizontal area (dx*dy)
    greaterthan : boolean, True if the integral should be over contours 
        greater than each layer interface. False for less than.'''

    # Get the mid-points of the layers (as defined by their interfaces)
    l_l_vals = 0.5 * (l_i_vals[:-1] + l_i_vals[1:])
    # xhistogram has some curiousities around nan-values, so exclude them
    nanmask = np.isnan(l)
    # Integrate p between each layer mid-point
    P_l = histogram(l.where(~nanmask),
                    bins=[l_l_vals],
                    weights=(p * area).where(~nanmask),
                    dim=['xh', 'yh', 'zl'],
                    block_size=1)
    # Cumulatively sum each layer (concatenate zeros at start), and take away from total sum to reverse the order of the summation
    # i.e. this the cumulative sum of integrated [p] from largest to smallest [l_l]
    # Concatenate zeroes
    # At the same time, reassign the coordinates to align with the layer mid-points
    #     P = xr.concat([P_l.sum(l.name+'_bin'),(P_l.sum(l.name+'_bin')-P_l.cumsum(l.name+'_bin'))],
    #                   dim=l.name+'_bin').assign_coords({l.name+'_bin':l_l_vals})
    P_l_cumsum = xr.concat([
        xr.zeros_like(P_l.isel({l.name + '_bin': 0})),
        P_l.cumsum(l.name + '_bin')
    ],
                           dim=l.name + '_bin')
    if greaterthan:
        P = (P_l.sum(l.name + '_bin') - P_l_cumsum).assign_coords(
            {l.name + '_bin': l_l_vals})
    else:
        P = P_l_cumsum.assign_coords({l.name + '_bin': l_l_vals})

    return P
Ejemplo n.º 7
0
def calc_volumetric_cumsum(l,p,l_l_vals,weight=None,dims=['xh','yh','zl'],greaterthan=True,):
    '''Integration of quantity [p] across volume with [l] greater than layer defined
    by contours l_l_vals.
    
    *** IMPORTANT *** 
    Limits of l_l_vals must span the full range of l.
    *****************
    
    l : xr.DataArray;
        Intensive variable, to define volume boundary
    p : xr.DataArray;
        Quantity to be integrated (e.g. thickness)
    l_l_vals : ndarray or xr.DataArray
        Values of the contours for which the integration will be calculated
        *** This is currently set up such that the layer values are increasing ***
        *** I expect it can be generalized, but have not explored this yet     ***
    weight : Distribution of weights by which to multiply [p] to remove spatial
        dimension of units. E.g. thickness in units 'm' should be multiplied by the area
        of the grid cell to recover total volume.
    greaterthan : boolean;
        True if the integral should be over contours 
        greater than each contour. False for less than.'''
    
    # xhistogram has some curiousities around nan-values, so exclude them
    nanmask = np.isnan(l)
    
    # Specify what should be integrated
    if weight is None:
        weights = p
    else:
        weights = p*weight
        
    # Integrate p between each contour layer
    P_l = histogram(l.where(~nanmask),bins=[l_l_vals],weights=weights.where(~nanmask),dim=dims,block_size=1)
    # Cumulatively sum each layer (concatenate zeros at start), and take away from total sum to reverse the order of the summation
    # i.e. this the cumulative sum of integrated [p] from largest to smallest [l_l]
    P_l_cumsum = xr.concat([xr.zeros_like(P_l.isel({l.name+'_bin':0})),P_l.cumsum(l.name+'_bin')],dim=l.name+'_bin')
    if greaterthan:
        # Take cumulative sum away from total sum to reverse the order
        # Assign coordinates to match the contours (rather than the interfaces)
        P = (P_l.sum(l.name+'_bin')-P_l_cumsum).assign_coords({l.name+'_bin':l_l_vals})
    else:
        P = P_l_cumsum.assign_coords({l.name+'_bin':l_l_vals})
                                                                          
    return P
Ejemplo n.º 8
0
def calc_G(l, dl, l_i_vals, area, plot=False):
    '''Mass transport across contours of [l].
    l : intensive variable, to evaluate transport across
    dl : summed tendency of (vertically integrated) l due to diffusive processes;
        e.g. heat tendency if l if temperature
    l_i_vals : interface values of l
    area : 2d distribution of horizontal area (dx*dy)'''

    # Get the spacing of contours of l
    delta_l_vals = np.diff(l_i_vals)
    # xhistogram has some curiousities around nan-values, so exclude them
    nanmask = np.isnan(l)
    # Integrate [dl] within layers of [l] and divide by [delta_l]
    G = histogram(l.where(~nanmask),
                  bins=[l_i_vals],
                  weights=(dl * area).where(~nanmask),
                  dim=['xh', 'yh', 'zl'],
                  block_size=1) / delta_l_vals

    if plot:
        G.plot()
    return G
Ejemplo n.º 9
0
def total_rebin_layerintegral(ds,
                              bin_data,
                              bins,
                              dim,
                              area,
                              block_size='auto',
                              verbose=False):
    """Rebin dataset [ds] in multiple dimensions,
    and integrate terms within new layers.
    """
    ds = ds.copy()
    ds_rebinned = xr.Dataset()
    for var in ds.data_vars:
        if ds[var].dtype == 'float':
            if verbose:
                print(var)
            nanmask = np.isnan(ds[var])
            ds_rebinned[var] = histogram(bin_data.where(~nanmask),
                                         bins=[bins],
                                         dim=dim,
                                         weights=(ds[var] *
                                                  area).where(~nanmask),
                                         block_size=block_size)
    return ds_rebinned
Ejemplo n.º 10
0
def calc_E_old(ds,
               l,
               l_i_vals,
               dl,
               c,
               area,
               z='depth',
               xdim='xh',
               ydim='yh',
               zldim='zl',
               zidim='zi',
               binning=None):
    '''Evaluation of the transport of tracer [c] across contours of tracer [l]
    l and c should be given as strings and both be contained in Dataset ds
    dl corresponds to the diffusive time tendency of tracer *content* of l,
    i.e. the tendency of rho*l*h where h is layer thickness, and rho is in situ
    density, thus in units of [kg m^-3]*[l tracer unit]*[m]*[s-1].
    Then
        E(l') = \frac{1}{\Delta l}\iint_{l=l'} c*dl dA
    This uses the fact that the tracer content tendency is equal to the 
    rho * tracer tendency integrated within a layer (which is formulation for
    wmt in Groeskamp et al, 2019).'''

    E = xr.Dataset()

    # Calculate tracer at layer centres from interface values
    l_l_vals = 0.5 * (l_i_vals[:-1] + l_i_vals[1:])
    # Calculate delta_tracer for each layer
    delta_l_vals = np.diff(l_i_vals)
    # Place in DataArrays
    l_i = xr.DataArray(l_i_vals, dims=['l_i'], coords={'l_i': l_i_vals})
    l_l = xr.DataArray(l_l_vals, dims=['l_l'], coords={'l_l': l_l_vals})
    delta_l = xr.DataArray(delta_l_vals,
                           dims=['l_l'],
                           coords={'l_l': l_l_vals})

    # Calculate cdl
    cdl = ds[c] * dl

    # Bin into layers of l, and sum up across space
    if binning is None:
        work = (cdl * area).sum(dim=[xdim, ydim])
        work = work.rename({vertc: 'l_l'})
    if binning == 'xhistogram':
        nanmask = np.isnan(cdl)
        work = histogram(ds[l].where(~nanmask),
                         bins=[l_i.values],
                         dim=['xh', 'yh', 'zl', 'time'],
                         weights=(cdl * area).where(~nanmask),
                         block_size=None)
        work = work.rename({l + '_bin': 'l_l'})
    if binning == 'busecke':
        l_depth_i = vc.linear_interpolation_regrid(ds[z + '_l'],
                                                   ds[l],
                                                   l_i,
                                                   z_bounds=ds[z + '_i'],
                                                   target_value_dim='l_i',
                                                   z_bounds_dim=zidim,
                                                   z_dim=zldim)
        cdl_remapped = conservative_remap(cdl,
                                          z_bnds_source=ds[z + '_i'],
                                          z_bnds_target=l_depth_i,
                                          z_dim=zldim,
                                          z_bnd_dim=zidim,
                                          z_bnd_dim_target='regridded',
                                          mask=True)
        work = (cdl_remapped * area).sum(dim=[xdim, ydim])
        work = work.rename({'remapped': 'l_l'})
        E['l_depth_i'] = l_depth_i

    E['E'] = work / delta_l
    E['dE'] = E['E'].interp(
        l_l=l_i, method='linear').diff('l_i').assign_coords(
            l_i=l_l.values).drop('l_l').rename({'l_i': 'l_l'})

    return E
Ejemplo n.º 11
0
    dsdx_slice = dsdx.isel(
        eta_rho=etaslice, xi_rho=xislice,
        s_rho=-1).sel(ocean_time=slice(str(y) + '-06-15',
                                       str(y) + '-07-25'))
    dsdy_slice = dsdy.isel(
        eta_rho=etaslice, xi_rho=xislice,
        s_rho=-1).sel(ocean_time=slice(str(y) + '-06-15',
                                       str(y) + '-07-25'))

    sgradbins = np.linspace(-0.001, 0.001, 100)

    dsdx_slice.name = 'dsdx'
    dsdy_slice.name = 'dsdy'

    #     #We need to remove the grid attributes from the variables. It will generate this annoying error code
    #     #because of an invalid character.
    dsdx_slice.attrs = ''
    dsdy_slice.attrs = ''

    print('Computing histogam')
    dsdx_hist = histogram(dsdx_slice, bins=[sgradbins], density=True)
    dsdy_hist = histogram(dsdy_slice, bins=[sgradbins], density=True)

    dsdx_hist.attrs = ''
    dsdy_hist.attrs = ''

    print('Saving histogram')
    dsdx_hist.to_netcdf(
        '/home/dylan/Variability/histograms/dsdx/histograms_dsdx_%i.nc' % y)
    dsdy_hist.to_netcdf(
        '/home/dylan/Variability/histograms/dsdy/histograms_dsdy_%i.nc' % y)
Ejemplo n.º 12
0
def calculate_AMOC_sigma_z(domain, ds, fn=None):
    """ calculate the AMOC in depth and density space """
    assert domain in ['ocn', 'ocn_low']
    for q in ['PD', 'VVEL', 'DXT', 'DYT', 'DXU', 'DYU', 'REGION_MASK']:
        assert q in ds

    (grid, ds_) = pop_tools.to_xgcm_grid_dataset(ds)
    ds_['DZU'] = xr_DZ_xgcm(domain=domain, grid='U')

    metrics = {
        ('X'): ['DXT', 'DXU'],  # X distances
        ('Y'): ['DYT', 'DYU'],  # Y distances
        ('Z'): ['DZU'],  # Z distances
    }
    coords = {
        'X': {
            'center': 'nlon_t',
            'right': 'nlon_u'
        },
        'Y': {
            'center': 'nlat_t',
            'right': 'nlat_u'
        },
        'Z': {
            'center': 'z_t',
            'left': 'z_w_top',
            'right': 'z_w_bot'
        }
    }
    grid = xgcm.Grid(ds_, metrics=metrics, coords=coords)

    print('merged annual datasets do not convert to U/T-lat/lons')
    if 'nlat' in ds_.VVEL.dims:
        rn = {'nlat': 'nlat_u', 'nlon': 'nlon_u'}
        ac = {'nlat_u': ds_.nlat_u, 'nlon_u': ds_.nlon_u}
        ds_['VVEL'] = ds_.VVEL.rename(rn).assign_coords()
    if 'nlat' in ds_.PD.dims:
        rn = {'nlat': 'nlat_t', 'nlon': 'nlon_t'}
        ac = {'nlat_t': ds_.nlat_t, 'nlon_t': ds_.nlon_t}
        ds_['PD'] = ds_.PD.rename(rn).assign_coords(ac)

    print('interpolating density to UU point')
    ds_['PD'] = grid.interp(grid.interp(ds_['PD'], 'X'), 'Y')

    print('interpolating REGION_MASK to UU point')
    fn_MASK = f'{path_prace}/MOC/AMOC_MASK_uu_{domain}.nc'
    if os.path.exists(fn_MASK):
        AMOC_MASK_uu = xr.open_dataarray(fn_MASK)
    else:
        MASK_uu = grid.interp(grid.interp(ds_.REGION_MASK, 'Y'), 'X')
        AMOC_MASK_uu = xr.DataArray(np.in1d(
            MASK_uu, [-12, 6, 7, 8, 9, 11, 12]).reshape(MASK_uu.shape),
                                    dims=MASK_uu.dims,
                                    coords=MASK_uu.coords)
        AMOC_MASK_uu.to_netcdf(fn_MASK)

    print('AMOC(y,z);  [cm^3/s] -> [Sv]')
    AMOC_yz = (grid.integrate(
        grid.cumint(ds_.VVEL.where(AMOC_MASK_uu), 'Z', boundary='fill'), 'X') /
               1e12)
    #     AMOC_yz = (ds_.VVEL*ds_.DZU*ds_.DXU).where(AMOC_MASK_uu).sum('nlon_u').cumsum('z_t')/1e12
    AMOC_yz = AMOC_yz.rename({'z_w_top': 'z_t'}).assign_coords({'z_t': ds.z_t})
    AMOC_yz.name = 'AMOC(y,z)'

    print('AMOC(sigma_0,z);  [cm^3/s] -> [Sv]')
    if int(ds_.PD.isel(z_t=0).mean().values) == 0:
        PD, PDbins = ds_.PD * 1000, np.arange(-10, 7, .05)
    if int(ds_.PD.isel(z_t=0).mean().values) == 1:
        PD, PDbins = (ds_.PD - 1) * 1000, np.arange(5, 33, .05)

    print('histogram')
    weights = ds_.VVEL.where(AMOC_MASK_uu) * ds_.DZU * ds_.DXU / 1e12
    #     ds_.PD.isel(z_t=0).plot()
    AMOC_sz = histogram(PD, bins=[PDbins], dim=['z_t'],
                        weights=weights).sum('nlon_u',
                                             skipna=True).cumsum('PD_bin').T
    AMOC_sz.name = 'AMOC(y,PD)'

    # output to file
    if fn is not None: xr.merge([AMOC_yz, AMOC_sz]).to_netcdf(fn)
    return AMOC_yz, AMOC_sz
Ejemplo n.º 13
0
def x_hist(data, bins):
    data_arr = xr.DataArray(data, dims=["a", "b"], name="aaa")
    res = xh.histogram(data_arr, bins=bins, dim="b")
    return res.data
Ejemplo n.º 14
0
    xislice = slice(270, 405)

    #Compute vertical relative vorticity
    rv = xroms.relative_vorticity(ds.u, ds.v, ds.u.attrs['grid'])
    #Interpolate to the rho points.
    rv = grid.interp(rv, 'Z')
    rv = rv.isel(eta_v=etaslice, xi_u=xislice, s_rho=-1)

    #Note we're going to need to select the second to last vertical value since its the w-points

    fx = grid.interp(ds.f, 'X', boundary='extend')
    fxy = grid.interp(fx, 'Y', boundary='extend')
    f = fxy.isel(eta_v=etaslice, xi_u=xislice)

    RVn = rv / f

    zetabins = np.linspace(-2.5, 2.5, 100)

    RVn.name = 'relative_vorticity_n'
    RVn_slice = RVn.sel(ocean_time=slice(str(y) + '-06-15', str(y) + '-07-25'))
    #We need to remove the grid attributes from the variables. It will generate this annoying error code
    #because of an invalid character.
    RVn_slice.attrs = ''

    print('Computing histogam')
    zetaf_hist = histogram(RVn_slice, bins=[zetabins], density=True)

    print('Saving histogram')
    zetaf_hist.to_netcdf(
        '/home/dylan/Variability/histograms/rho/histograms_zetaf_%i.nc' % y)
Ejemplo n.º 15
0
    q_u_total = salt_u * ds.u * dA_u
    q_u_box = q_u_total.isel(xi_u=xislice, eta_rho=etaslice)
    salt_u_box = salt_u.isel(xi_u=xislice, eta_rho=etaslice)

    q_u_right = q_u_box.isel(xi_u=-1)
    q_u_right.name = 'q_right'
    salt_u_right = salt_u_box.isel(xi_u=-1)
    salt_u_right.name = 'salt_right'

    q_u_left = q_u_box.isel(xi_u=0)
    q_u_left.name = 'q_left'
    salt_u_left = salt_u_box.isel(xi_u=0)
    salt_u_left.name = 'salt_left'

    qlefthist = histogram(salt_u_left,
                          bins=[salt_bins],
                          weights=q_u_left,
                          dim=['s_rho', 'eta_rho'])
    qleft = qlefthist.sum(axis=0)
    Qleft = qlefthist.sum(axis=0) * dsalt
    Qleftin = qlefthist.where(qleft > 0).sum(axis=1) * dsalt
    Qlefttout = qlefthist.where(qleft < 0).sum(axis=1) * dsalt

    qleft.to_netcdf('qleft_%02d.nc' % m)
    print('qleft_%02d.nc DONE!' % m, flush=True)
    Qleft.to_netcdf('Qleft_%02d.nc' % m)
    Qleftin.to_netcdf('Qleftin_%02d.nc' % m)
    Qleftout.to_netcdf('Qleftout_%02d.nc' % m)
    print('Qleftout_%02d.nc' % m, flush=True)

    qrighthist = histogram(salt_u_right,
                           bins=[salt_bins],
Ejemplo n.º 16
0
                .dropna('npart')
    trajY = cm.interp_to_coords(trajT,
                                ctr.loc[rng], Zeq.loc[rng],
                                interpDim='Z') \
              .transpose('npart','time') \
              .reset_coords(drop=True)

    diffY = trajY.diff('time')**2 / 300

    # remove nan according to trajY
    diffY, xpos = xr.align(diffY, xpos)
    diffY, zpos = xr.align(diffY, zpos)

    hsum = histogram(zpos,
                     xpos,
                     dim=['npart'],
                     weights=np.abs(diffY),
                     bins=[binZ, binX])
    hcnt = histogram(zpos, xpos, dim=['npart'], bins=[binZ, binX])

    histSum.append(hsum)
    histCnt.append(hcnt)

# hstSum = xr.merge(histSum)
# hstCnt = xr.merge(histCnt)

#%% write files
for data in histSum:
    for tim in data:
        tt = int(tim.time.values / 300)
        print(tt)