コード例 #1
0
ファイル: hgt_example.py プロジェクト: ajdawson/eofs

# Read geopotential height data using the xarray module. The file contains
# December-February averages of geopotential height at 500 hPa for the
# European/Atlantic domain (80W-40E, 20-90N).
filename = example_data_path('hgt_djf.nc')
z_djf = xr.open_dataset(filename)['z']

# Compute anomalies by removing the time-mean.
z_djf = z_djf - z_djf.mean(dim='time')

# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(z_djf.coords['latitude'].values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(z_djf, weights=wgts)

# Retrieve the leading EOF, expressed as the covariance between the leading PC
# time series and the input SLP anomalies at each grid point.
eof1 = solver.eofsAsCovariance(neofs=1)

# Plot the leading EOF expressed as covariance in the European/Atlantic domain.
clevs = np.linspace(-75, 75, 11)
proj = ccrs.Orthographic(central_longitude=-20, central_latitude=60)
ax = plt.axes(projection=proj)
ax.coastlines()
ax.set_global()
eof1[0, 0].plot.contourf(ax=ax, levels=clevs, cmap=plt.cm.RdBu_r,
                         transform=ccrs.PlateCarree(), add_colorbar=False)
ax.set_title('EOF1 expressed as covariance', fontsize=16)
plt.show()
コード例 #2
0
ファイル: sst_example.py プロジェクト: ajdawson/eofs
import xarray as xr

from eofs.xarray import Eof
from eofs.examples import example_data_path


# Read SST anomalies using the xarray module. The file contains November-March
# averages of SST anomaly in the central and northern Pacific.
filename = example_data_path('sst_ndjfm_anom.nc')
sst = xr.open_dataset(filename)['sst']

# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(sst.coords['latitude'].values))
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(sst, weights=wgts)

# Retrieve the leading EOF, expressed as the correlation between the leading
# PC time series and the input SST anomalies at each grid point, and the
# leading PC time series itself.
eof1 = solver.eofsAsCorrelation(neofs=1)
pc1 = solver.pcs(npcs=1, pcscaling=1)

# Plot the leading EOF expressed as correlation in the Pacific domain.
clevs = np.linspace(-1, 1, 11)
ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=190))
fill = eof1[0].plot.contourf(ax=ax, levels=clevs, cmap=plt.cm.RdBu_r,
                             add_colorbar=False, transform=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor='w', edgecolor='k')
cb = plt.colorbar(fill, orientation='horizontal')
cb.set_label('correlation coefficient', fontsize=12)
コード例 #3
0
def compute_relative_entropy(
    initialized,
    control,
    anomaly_data=False,
    neofs=None,
    curv=True,
    nlead=None,
    nmember_control=10,
):
    """
    Compute relative entropy.

    Calculates EOFs from anomalies. Projects fields on EOFs to receive
    pseudo-Principle Components per init and lead year. Calculate
    relative entropy based on _relative_entropy_formula.

    Args:
        initialized (xr.Dataset): anomaly ensemble data with dimensions
                                    lead, member, time and
                                    spatial [lon (x), lat(y)].
                                    DPLE or PM_ds
        control (xr.Dataset): anomaly control distribution with
                                              non-spatial dimensions:
                                              spatial [lon (x), lat(y)].
                                              - LENS: member, time
                                              - PM_control: time
        anomaly_data (bool): Input data is anomaly alread. Default: False.
        neofs (int): number of EOFs to use.
                     Default: initialized.member.size.
        curv (bool): if curvilinear grids disables EOF weights.
        nlead (int): number of timesteps calculated.
        nmember_control (int): number of members created from
                               bootstrapping from control

    Returns:
        rel_ent (xr.Dataset): relative entropy
    """
    if Eof is None:
        raise ImportError("eofs is not installed; see"
                          "https://ajdawson.github.io/eofs/latest/index.html")
    # Defaults
    if neofs is None:
        neofs = initialized.member.size
    if nlead is None:
        nlead = initialized.lead.size

    # case if you submit control with dim time and member, LENS case
    if "member" in control.dims:
        control_uninitialized = _bootstrap_dim(
            control,
            initialized.lead.size,
            dim="init",
            dim_label=list(initialized.init.values),
        )

    # case if you only submit control with dim time, PM case
    else:
        control_uninitialized = xr.concat(
            [
                _bootstrap_dim(
                    control,
                    initialized.lead.size,
                    dim="member",
                    dim_label=np.arange(nmember_control),
                ) for _ in range(initialized.init.size)
            ],
            dim="init",
        )
        control_uninitialized["init"] = initialized.init.values

    # initialized and control_uninitialized are allowed to have different
    # dims as I need more members to sample my control distr. properly
    if set(initialized.dims) != set(control_uninitialized.dims):
        warnings.warn(
            "Warning: initialized and control_uninitialized have different coords."
        )
        # print(initialized, control_uninitialized)

    # convert to xr.Data.Array
    if isinstance(control_uninitialized, xr.Dataset):
        control_uninitialized = control_uninitialized.to_array().squeeze()
    if isinstance(initialized, xr.Dataset):
        initialized = initialized.to_array().squeeze()

    # detrend
    non_spatial_dims = set(control_uninitialized.dims).intersection(
        ["init", "member"])
    non_spatial_dims = list(non_spatial_dims)
    if not anomaly_data:  # if ds, control are raw values
        anom_x = initialized - control_uninitialized.mean(non_spatial_dims)
        anom_b = control_uninitialized - control_uninitialized.mean(
            non_spatial_dims)
    else:  # leave as is when already anomalies
        anom_x = initialized
        anom_b = control_uninitialized

    # prepare for EOF
    if curv:  # if curvilinear lon(x,y), lat(x,y) data inputs
        wgts = None
    else:  # assumes there is 'lat' in coords
        coslat = np.cos(np.deg2rad(anom_x.coords["lat"].values))
        wgts = np.sqrt(coslat)[..., np.newaxis]

    # EOF requires xr.dataArray
    if isinstance(control, xr.Dataset):
        control = control.to_array().squeeze()

    if "member" in control.dims:  # LENS
        # stack member and init into time dim, make time first
        non_spatial_control_dims = list(
            set(control.dims).intersection(["time", "member"]))

        transpose_dims = list(control.dims)
        transpose_dims.remove("member")
        transpose_dims.remove("time")
        dims = tuple(["time"] + transpose_dims)

        base_to_calc_eofs = (control.stack(
            new=tuple(non_spatial_control_dims)).rename({
                "new": "time"
            }).set_index({
                "time": "time"
            }).transpose(*dims))
    else:
        # PM_control
        base_to_calc_eofs = control

    solver = Eof(base_to_calc_eofs, weights=wgts)

    re_leadtime_list = []
    leads = initialized.lead.values[:nlead]
    inits = initialized.init.values
    # DoTo: parallelize this double loop
    for init in inits:  # loop over inits
        rl, sl, dl = ([] for _ in range(3))  # lists to store results in
        for lead in leads:  # loop over lead time
            # P_b base distribution # eofs require time
            pc_b = solver.projectField(
                anom_b.sel(init=init, lead=lead).drop_vars("lead").rename(
                    {"member": "time"}),
                neofs=neofs,
                eofscaling=0,
                weighted=False,
            ).rename({"time": "lead"})

            mu_b = pc_b.mean("lead")
            sigma_b = xr.DataArray(np.cov(pc_b.T))

            # P_x init distribution
            pc_x = solver.projectField(
                anom_x.sel(init=init, lead=lead).drop_vars("lead").rename(
                    {"member": "time"}),
                neofs=neofs,
                eofscaling=0,
                weighted=False,
            ).rename({"time": "lead"})

            mu_x = pc_x.mean("lead")
            sigma_x = xr.DataArray(np.cov(pc_x.T))

            r, d, s = _relative_entropy_formula(sigma_b, sigma_x, mu_x, mu_b,
                                                neofs)

            rl.append(r)
            sl.append(s)
            dl.append(d)

        re_leadtime_list.append(
            xr.Dataset({
                "R": ("lead", rl),
                "S": ("lead", sl),
                "D": ("lead", dl)
            }))

    re = xr.concat(re_leadtime_list, dim="init").assign(init=inits, lead=leads)

    return re
コード例 #4
0
def main(args):
    #environmental constants  
    if platform.system() == 'Windows':
        in_dir='../examples/'
        out_dir='../regressors/'
        reg_dir='../regressors/'#${in_dir}'regresory_2013/'
        nc_gen=True
        pdf_gen=False
        plus = ''
    else:
        n_samples = int(os.environ['n_samples'])
        in_dir = os.environ['in_dir']
        #out_dir = os.environ['out_dir']
        reg_dir = os.environ['reg_dir']
        pdf_gen = os.environ['pdf_gen']
        nc_gen = os.environ['nc_gen']

    what_re = args.what_re
    vari = args.vari
    i_year = args.i_year
    s_year = args.s_year
    e_year = args.e_year
    in_file_name = args.in_file_name

    if args.verbose:
        print('dataset: ', what_re)
        print('variable: ', vari)
        print('initial year of dataset: ', i_year)
        print('initial year of analysis: ', s_year)
        print('end year of analysis: ', e_year)
        print('input filename: ', in_file_name)


    print('data opening')
    in_netcdf = in_dir + in_file_name
    print(in_netcdf)
    ds = xr.open_dataset(in_netcdf)
    #print(ds)
    lat_name = fce.get_coords_name(ds, 'latitude')
    lat = ds.coords[lat_name]
    nlat = lat.shape[0]

    lev_name = fce.get_coords_name(ds, 'pressure')
    if ds.coords[lev_name].attrs['units'] == 'Pa':
        lev =  ds.coords[lev_name]/100.
        ds[lev_name] = lev    
    else:
        lev = ds.coords[lev_name]

    
    n = ds.coords['time'].shape[0]
    #it may happen that the field is 3D (longitude is missing)
    try:
        lon_name = fce.get_coords_name(ds, 'longitude')
        lon = ds.coords[lon_name]
        nlon = lon.shape[0]
    except:
        nlon = 1

    #print nlat, nlev, n, nlon

    #zonal mean
    if nlon != 1:
        uwnd = ds[vari].mean(lon_name)
    else:
        uwnd = ds[vari]
      
    #equatorial average and level selection
    sel_dict = {lev_name: fce.coord_Between(lev,10,50), lat_name: fce.coord_Between(lat,-10,10)}    
    zm_u = uwnd.sel(**sel_dict).mean(lat_name)
    #period selection
    times = pd.date_range(str(s_year)+'-01-01', str(e_year)+'-12-31', name='time', freq = 'M')
    zm_u_sel = zm_u.sel(time = times, method='ffill') #nearest
    #remove seasonality
    climatology = zm_u_sel.groupby('time.month').mean('time')
    anomalies = zm_u_sel.groupby('time.month') - climatology

    #print anomalies
    #sys.exit()
    
    #additional constants
    npca = 30
    norm=2 #5
    norms=3 #5
    what_sp = '' # what solar proxy?

    print("regressors' openning")
    global reg#, reg_names, nr
    reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo' , i_year, s_year, e_year, reg_dir)
    nr = reg.shape[1]
    #print(anomalies) 
    #extracting of other variability by MLR
    stacked = anomalies.stack(allpoints = [lev_name])
    stacked = stacked.reset_coords(drop=True)
    resids = stacked.groupby('allpoints').apply(xr_regression)
    resids = resids.rename({'dim_0': 'time'})
    resids['time'] = times
    #EOF analysis
    solver = Eof(resids.T, weights=None) 
    #sys.exit()

    #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.)
    #wgts = np.sqrt(coslat)[np.newaxis,...]   

    for i in range(npca):		
            var_eofs = solver.varianceFraction(neigs=i)
            #print var_eofs
            if np.sum(var_eofs) > 0.95:
                    npca=i
                    total_variance = np.sum(var_eofs)
                    print(total_variance, ' % based on ', i, ' components')
                    break

    var_eofs = solver.varianceFraction(neigs=npca)
    pcs = solver.pcs(npcs=npca, pcscaling=1)
    nte = solver.northTest(neigs=npca, vfscaled=True)

    subdir = './'
    if pdf_gen:
        fig = plt.figure(figsize=(11,8))
        ax1 = fig.add_subplot(111)
        ax1.set_title(str(npca)+' PCAs cover '+str(np.round(total_variance*100, 2))+'% of total variance')
        for i in xrange(npca):
                #plotting
                pcs[:,i].plot(linewidth = 2, ax = ax1, label = 'pca '+str(i+1))

        ax1.set_xlabel('time [years]')
        ax1.set_ylabel('QBO index')
        ax1.set_title('')
        ax1.legend(loc = 'best')
        plt.savefig(reg_dir+'qbo_'+what_re+'_pcas.pdf', bbox_inches='tight')
        plt.close(fig)    
                
    if nc_gen:
        #save to netcdf
        #print(pcs[:,0])
        for i in range(npca):
            pcs_ds = pcs[:,i].to_dataset(name = 'index')
            pcs_ds.to_netcdf(reg_dir+r'qbo_'+what_re+'_pc'+str(i+1)+pripona_nc)       
コード例 #5
0
def eof_orca_latlon_box(run, var, modes, lon_bnds, lat_bnds, pathfile, plot,
                        time, eoftype):

    if (var == 'temp'):
        key = 'votemper'
        key1 = "votemper"
    elif (var == 'sal'):
        key = 'vosaline'
        key1 = "vosaline"
    elif (var == 'MLD'):
        key = 'somxl010'
        key1 = "somxl010"

    # read data
    ds = xr.open_dataset(pathfile)
    #ds["time_counter"] = ds['time_counter']+(np.datetime64('0002-01-01')-np.datetime64('0001-01-01'))

    if time == 'comparison':
        ds = ds.sel(time_counter=slice('1958-01-01', '2006-12-31'))

    # cut box for EOF at surface
    if var == 'MLD':
        data = ds[key].sel(lon=slice(lon_bnds[0], lon_bnds[1]),
                           lat=slice(lat_bnds[0], lat_bnds[1]))
        #data = cut_latlon_box(ds[key][:,:,:],ds.lon,ds.lat,
        # lon_bnds,lat_bnds)
    else:
        data = ds[key][:, 0, :, :].sel(lon=slice(lon_bnds[0], lon_bnds[1]),
                                       lat=slice(lat_bnds[0], lat_bnds[1]))
        #data = cut_latlon_box(ds[key][:,0,:,:],ds.lon,ds.lat,
        # lon_bnds,lat_bnds)
    data = data.to_dataset()
    # detrend data
    data[key1] = (['time_counter', 'lat', 'lon'],
                  signal.detrend(data[key].fillna(0), axis=0, type='linear'))

    #data=data.where(data!=0)

    # remove seasonal cycle and drop unnecessary coordinates
    if 'time_centered' in list(data.coords):
        data = deseason_month(data).drop('month').drop(
            'time_centered')  # somehow pca doesn't work otherwise
    else:
        data = deseason_month(data).drop(
            'month')  # somehow pca doesn't work otherwise

    # set 0 values back to nan
    data = data.where(data != 0)

    # EOF analysis
    #Square-root of cosine of latitude weights are applied before the computation of EOFs.
    coslat = np.cos(np.deg2rad(data['lat'].values))
    coslat, _ = np.meshgrid(coslat, np.arange(0, len(data['lon'])))
    wgts = np.sqrt(coslat)
    solver = Eof(data[key], weights=wgts.transpose())
    pcs = solver.pcs(npcs=modes, pcscaling=1)
    if eoftype == 'correlation':
        eof = solver.eofsAsCorrelation(neofs=modes)
    elif eoftype == 'covariance':
        eof = solver.eofsAsCovariance(neofs=modes)
    else:
        eof = solver.eofs(neofs=modes)
    varfr = solver.varianceFraction(neigs=4)
    print(varfr)

    #----------- Plotting --------------------
    plt.close("all")
    if plot == 1:
        for i in np.arange(0, modes):
            fig = plt.figure(figsize=(8, 2))
            ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.9],
                               projection=ccrs.PlateCarree())  # main axes
            ax1.set_extent(
                (lon_bnds[0], lon_bnds[1], lat_bnds[0], lat_bnds[1]))
            # discrete colormap
            cmap = plt.get_cmap('RdYlBu',
                                len(np.arange(10, 30)) -
                                1)  #inferno similar to cmo thermal
            eof[i, :, :].plot(ax=ax1,
                              cbar_kwargs={'label': 'Correlation'},
                              transform=ccrs.PlateCarree(),
                              x='lon',
                              y='lat',
                              add_colorbar=True,
                              cmap=cmap)
            gl = map_stuff(ax1)
            gl.xlocator = mticker.FixedLocator([100, 110, 120])
            gl.ylocator = mticker.FixedLocator(np.arange(-35, -10, 5))
            plt.text(116,
                     -24,
                     str(np.round(varfr[i].values, decimals=2)),
                     horizontalalignment='center',
                     verticalalignment='center',
                     transform=ccrs.PlateCarree(),
                     fontsize=8)

            ax2 = fig.add_axes([0.5, 0.1, 0.55, 0.9])  # main axes
            plt.plot(pcs.time_counter,
                     pcs[:, i].values,
                     linewidth=0.1,
                     color='k')
            anomaly(ax2, pcs.time_counter.values, pcs.values[:, i], [0, 0])
            ax2.set_xlim(
                [pcs.time_counter[0].values, pcs.time_counter[-1].values])
            plt.savefig(pathplots + 'eof_as' + eoftype + '_mode' + str(i) +
                        '_' + time + '_' + run + '_' + var + '.png',
                        dpi=300,
                        bbox_inches='tight',
                        pad_inches=0.1)
            plt.show()
    #----------------------------------------------

    return pcs, eof, varfr
コード例 #6
0
def calcEOF(xrdata, data_var, w, wei=True):
    """
    input:
        xrdata: xarray Dataset
        data_var: string. Variable name to use on EOF.
        w: string. variable for using weights. E.g. latitude

        use as:
            solver, eof1, var1 = calcEOF(xrdata, 'data_var')
    """
    xrdata = xrdata - xrdata[data_var].mean(dim="time")

    # Testing if we can select data from level, lat and time
    try:
        xrdata = xrdata.sel(level=1000,
                            latitude=slice(90, 20),
                            time=slice("1979-01-01", "2000-12-31"))
        print(
            'Data selection OK on first try. Level, lat and time slice done.')
    except ValueError:
        try:
            print('valueError: Trying next')
            xrdata = xrdata.sel(level=1000,
                                lat=slice(90, 20),
                                time=slice("1979-01-01", "2000-12-31"))
            print(
                'Data selection OK on second try. Level, lat and time slice done.'
            )
        except ValueError:
            try:
                print('valueError: Trying next')
                xrdata = xrdata.sel(latitude=slice(90, 20),
                                    time=slice("1979-01-01", "2000-12-31"))
                print('Data selection OK on third try. No level cut')
            except ValueError:
                try:
                    print('valueError: Trying next')
                    xrdata = xrdata.sel(time=slice("1979-01-01", "2000-12-31"))
                    print('Data selection OK on fourth try. Only time slice.')
                except:
                    raise TypeError(' Data out of limits')

    xrdata = (xrdata.groupby('time.month') -
              xrdata[data_var].groupby('time.month').mean())
    #  To ensure equal area weighting for the covariance matrix,
    # the gridded data is weighted by the square root of the cosine of
    # latitude. - NOAA

    if wei == True:
        coslat = np.cos(np.deg2rad(xrdata.coords[w].values)).clip(0., 1.)
        # np.newaxis add a dimention to wgts. dont know what ... does
        # i think its like a transposed. It took all the objects on a list and
        # make a new list with lists inside (each list with only one object)
        # just an adjustment of format
        wgts = np.sqrt(coslat)[..., np.newaxis]
        # The EOF analysis is handled by a solver class, and the EOF solution
        # is computed when the solver class is created. Method calls are then used
        # to retrieve the quantities of interest from the solver class.
        # center = False do not remove mean from data
        # solver = Eof(m_anomalie.hgt, weights=wgts, center=False)
        """
        # solver.eofAsCovariance Returns the EOFs expressed as the covariance between each PC and the input

        # data set at each point in space. they are not actually the EOFs. They tell you how each point in space

        # varies like the given mode. The eofs method provides the raw EOFs (eigenvectors of the covariance

        # matrix) which are the spatial patterns the PCs are the coefficeints of.

        # “The covariance matrix is used for the EOF analysis.” - NOAA """

        solver = Eof(xrdata[data_var], weights=wgts)
    else:
        solver = Eof(xrdata[data_var])
    # solver = Eof(s_anomalie.hgt, weights=wgts, center=False)
    # Retrieve the leading EOF, expressed as the covariance between the leading PC
    # time series and the input SLP anomalies at each grid point.
    eof1 = solver.eofsAsCovariance(pcscaling=1)
    var1 = solver.varianceFraction().sel(mode=0)

    return solver, eof1, var1
コード例 #7
0
klrat = {}
for ii in range(8):
    covu = cov(umeana[ii, :]**2)
    covv = cov(vmeana[ii, :]**2)
    covuv = cov(umeana[ii, :] * vmeana[ii, :])

    varmax = 0.5 * (covu + covv + sqrt((covu - covv)**2 + 4 * covuv**2))
    varmax
    varmin = covu**2 + covv**2 - varmax
    varmin

    import xarray as xray
    uvmat = xray.concat((umeana, vmeana), dim='uv')

    from eofs.xarray import Eof
    solver = Eof(uvmat.isel(distance=ii).T)

    evec1x = solver.eofs(neofs=1)[0][0].values
    evec1y = solver.eofs(neofs=1)[0][1].values

    maxtheta_rad = arctan(evec1y / evec1x)
    maxtheta = arctan(evec1y / evec1x) * 180 / pi
    maxtheta

    eig1 = solver.eigenvalues()[0].values
    eig2 = solver.eigenvalues()[1].values
    maxvar = 2 * sqrt(5.991 * eig1)
    minvar = 2 * sqrt(5.991 * eig2)

    fig, ax = plt.subplots(subplot_kw={'aspect': 'equal'})
    hist2d(umean[ii, :], vmean[ii, :], 30)
コード例 #8
0
def eof_analyse(xarray_DataArray, neofs):
    solver = Eof(xarray_DataArray)
    eofs = solver.eofs(neofs)
    return eofs
コード例 #9
0
ファイル: eof.py プロジェクト: csiro-dcfp/matear-nb
import numpy as np
import xarray as xr

from eofs.xarray import Eof
from eofs.examples import example_data_path

# Read SST anomalies using the xarray module. The file contains November-March
# averages of SST anomaly in the central and northern Pacific.
filename = example_data_path('sst_ndjfm_anom.nc')
sst = xr.open_dataset(filename)['sst']

# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(sst.coords['latitude'].values))
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(sst, weights=wgts)

# Retrieve the leading EOF, expressed as the correlation between the leading
# PC time series and the input SST anomalies at each grid point, and the
# leading PC time series itself.
eof1 = solver.eofsAsCorrelation(neofs=1)
pc1 = solver.pcs(npcs=1, pcscaling=1)

# Plot the leading EOF expressed as correlation in the Pacific domain.
clevs = np.linspace(-1, 1, 11)
ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=190))
fill = eof1[0].plot.contourf(ax=ax,
                             levels=clevs,
                             cmap=plt.cm.RdBu_r,
                             add_colorbar=False,
                             transform=ccrs.PlateCarree())
コード例 #10
0
def LFCA(da, N=30, L=1/10, fs=12, order=3, landmask=None, monthly=True):
    """Perform LFCA (as per Wills et al, 2018, GRL) on a dataarray.

    Parameters
    ----------
    da : xarray.DataArray
        Data to perform LFCA on (time x lat x lon)
    N : int
        Number of EOFs to retain
    L : float
        Cutoff frequency for lowpass filter (e.g. 1/10 for per decade)
    fs : float
        Sampling frequency (1/12 for monthly)
    order : int
        Order of the Butterworth filter
    landmask : xarray.DataArray or None
        If None, do not perform any masking
        If DataArray, indicates land locations
    monthly : bool
        If True, perform lowpass filtering for each month separately

    Returns
    -------
    LFPs : numpy.ndarray
        2D array of N spatial patterns (nlat*nlon x N)
    LFCs : numpy.ndarray
        2D array of N time series (ntime x N)

    """

    from eofs.xarray import Eof

    # remove empirical seasonal cycle
    da = da.groupby('time.month') - da.groupby('time.month').mean('time')

    ntime, nlat, nlon = da.shape

    if landmask is not None:

        # expand land mask to ntime
        lnd_mask = np.repeat(is_land.values[np.newaxis, :, :], ntime, axis=0)
        da = da.where(lnd_mask)

    coslat = np.cos(np.deg2rad(da['lat'].values)).clip(0., 1.)
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(da, weights=wgts)

    eofs = solver.eofs(eofscaling=0)  # normalized st L2 norm = 1
    eigenvalues = solver.eigenvalues()

    # Low pass filter data
    if monthly:
        fs = 1

    nyq = 0.5 * fs  # Nyquist frequency
    low = L / nyq
    sos = butter(order, low, btype='low', output='sos')  # Coefficients for Butterworth filter
    if monthly:
        X_tilde = np.empty((da.shape))
        for kk in range(12):
            X_tilde[kk::12, :, :] = sosfiltfilt(sos, da.values[kk::12, :, :], padtype='even', axis=0)

    else:
        X_tilde = sosfiltfilt(sos, da.values, axis=0)

    a_k = eofs.values[:N, :, :].reshape((N, nlat*nlon))
    sigma_k = np.sqrt(eigenvalues.values[:N])

    if landmask is not None:
        lnd_mask_vec = is_land.values.flatten()
    else:
        lnd_mask_vec = np.ones((nlat*nlon,), dtype=bool)

    PC_tilde = np.empty((ntime, N))
    for kk in range(N):
        PC_tilde[:, kk] = 1/sigma_k[kk]*np.dot(X_tilde.reshape((ntime, nlat*nlon))[:, lnd_mask_vec],
                                               a_k[kk, lnd_mask_vec])

    R = np.dot(PC_tilde.T, PC_tilde)/(N - 1)
    R_eigvals, e_k = np.linalg.eig(R)  # eigenvalues already sorted

    # eigenvalues are in columns
    u_k = np.dot((a_k.T)/sigma_k, e_k)
    LFPs = np.dot(sigma_k*(a_k.T), e_k)

    # Time series:
    LFCs = np.dot(da.values.reshape((ntime, nlat*nlon))[:, lnd_mask_vec], u_k[lnd_mask_vec, :])

    return LFPs, LFCs
コード例 #11
0
ファイル: eof.py プロジェクト: zexuanxu/climate-data-science
from esmtools.stats import*
from eofs.xarray import Eof


# --- read netcdf file
dset = xr.open_dataset('asstdt_pacific.nc')

# --- select djf months
sst = dset['sst'].sel(time=np.in1d(dset['time.month'], [1, 2, 12]))

# --- square-root of cosine of latitude weights
coslat = np.cos(np.deg2rad(sst.coords['lat'].values))
wgts = np.sqrt(coslat)[..., np.newaxis]
# --- eof solver
solver = Eof(sst, weights=wgts)
# --- eof results
eofs = solver.eofsAsCorrelation(neofs=2)
pcs = solver.pcs(npcs=2, pcscaling=1)
variance_fractions = solver.varianceFraction()
north_test = solver.northTest(vfscaled=True)


# --- spatial patterns
fig, ax = plot.subplots(axwidth=5, nrows=2, tight=True, proj='pcarree',
                        proj_kw={'lon_0': 180})
# --- format options
ax.format(land=False, coast=True, innerborders=True, borders=True,
          large='15px', labels=False,
          latlim=(31, -31), lonlim=(119, 291),
          geogridlinewidth=0,
コード例 #12
0
r = 6.371 * 10**6
coslats = np.cos((np.pi / 180) * latts)
coslats = coslats[:-1, :-1]
areas = r**2 * coslats * dlats * dlons

Q_s = Q_s[:, :-1, :-1]
Qr = Qr[:, :-1, :-1]
sst = sst[:, :-1, :-1]
tendsst = tendsst[:, :-1, :-1]

tot_area = np.sum(areas)

weights = areas / tot_area

#Calculate EOFs for SST, Qs and Qo. Should weight by area?
solver = Eof(sst, weights=weights)
sst_eof = solver.eofs(neofs=3, eofscaling=2)

sst_eof_varfracs = solver.varianceFraction()

solver = Eof(Qr, weights=weights)
Qo_eof = solver.eofs(neofs=3, eofscaling=2)
Qo_pc = solver.pcs(npcs=3, pcscaling=2)

Qo_eof_varfracs = solver.varianceFraction()

Qo_rec = solver.reconstructedField(5)

Qo_rec_var = Qo_rec.var(dim='time')

#get projection (pseudo-PCs) associated with Qo EOFs
コード例 #13
0
ファイル: processing.py プロジェクト: nasa-nccs-cds/edask
 def orthoModes(cls, data, nModes):
     # type: (np.ndarray, int) -> np.ndarray
     eof = Eof(data, None, False, False)
     result = eof.eofs(0, nModes)  # type: np.ndarray
     result = result / (np.std(result) * math.sqrt(data.shape[1]))
     return result.transpose()
コード例 #14
0
ファイル: NPGO_Index.py プロジェクト: bradyrx/public_code
def main():
    ens = sys.argv[1]
    sYear = sys.argv[2]
    eYear = sys.argv[3]
    if int(sYear) < 1920:
        raise ValueError("Starting year must be 1920 or later.")
    if int(eYear) > 2100:
        raise ValueError("End year must be 2100 or earlier.")
    print("Computing NPGO for ensemble number " + ens + "...")
    filepath = ('/glade/scratch/rbrady/EBUS_BGC_Variability/' +
                'global_residuals/SST/remapped/remapped.SST.' + ens +
                '.192001-210012.nc')
    print("Global residuals loaded...")
    ds = xr.open_dataset(filepath)
    ds = ds['SST'].squeeze()
    # Make time dimension readable through xarray.
    ds['time'] = pd.date_range('1920-01', '2101-01', freq='M')
    # Reduce to time period of interest.
    ds = ds.sel(time=slice(sYear + '-01', eYear + '-12'))
    # Slice down to Northeast Pacific domain.
    ds = ds.sel(lat=slice(25, 62), lon=slice(180, 250))
    # Take annual JFM means.
    month = ds['time.month']
    JFM = (month <= 3)
    ds_winter = ds.where(JFM).resample('A', 'time')
    # Compute EOF
    coslat = np.cos(np.deg2rad(ds_winter.lat.values))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(ds_winter, weights=wgts, center=False)
    print("NPGO computed.")
    eof = solver.eofsAsCorrelation(neofs=2)
    variance = solver.varianceFraction(neigs=2)
    # Reconstruct the monthly index of SSTa by projecting
    # these values onto the annual PC timeseries.
    pseudo_pc = solver.projectField(ds, neofs=2, eofscaling=1)
    # Set up as dataset.
    ds = eof.to_dataset()
    ds['pc'] = pseudo_pc
    ds['variance_fraction'] = variance
    ds = ds.rename({'eofs': 'eof'})
    ds = ds.sel(mode=1)
    # Invert to the proper values for the bullseye.
    if ds.sel(lat=45.5, lon=210).eof < 0:
        pass
    else:
        ds['eof'] = ds['eof'] * -1
        ds['pc'] = ds['pc'] * -1
    # Change some attributes for the variables.
    ds['eof'].attrs['long_name'] = 'Correlation between PC and JFM SSTa'
    ds['pc'].attrs['long_name'] = 'Principal component for NPGO'
    # Add a description of methods for clarity.
    ds.attrs[
        'description'] = 'Second mode of JFM SSTa variability over 25-62N and 180-110W.'
    ds.attrs[
        'anomalies'] = 'Anomalies were computed by removing the ensemble mean at each grid cell.'
    ds.attrs['weighting'] = (
        'The native grid was regridded to a standard 1deg x 1deg (180x360) grid.'
        + 'Weighting was computed via the sqrt of the cosine of latitude.')
    print("Saving to netCDF...")
    ds.to_netcdf('/glade/p/work/rbrady/NPGO/NPGO.' + ens + '.' + str(sYear) +
                 '-' + str(eYear) + '.nc')
コード例 #15
0
# Begining
"""
# Create an EOF solver to do the EOF analysis.
# Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
# scaling values to avoid overrepresented areas
# np.clip: Given an interval, values outside the interval are clipped to
# the interval edges. For example, if an interval of [0, 1] is specified,
# values smaller than 0 become 0, and values larger than 1 become 1.
coslat = np.cos(np.deg2rad(m_anomalie.coords['lat'].values)).clip(0., 1.)
# np.newaxis add a dimention to wgts. dont know what ... does
# i think its like a transposed. It took all the objects on a list and
# make a new list with lists inside (each list with only one object)
# just an adjustment of format
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(m_anomalie.hgt, weights=wgts)

# Retrieve the leading EOF, expressed as the covariance between the leading PC
# time series and the input SLP anomalies at each grid point.
eof1 = solver.eofsAsCovariance(neofs=2)

# Plot the leading EOF expressed as covariance in the European/Atlantic domain.
clevs = np.linspace(-75, 75, 11)
proj = ccrs.Orthographic(0, 90)
ax = plt.axes(projection=proj)
ax.coastlines()
ax.set_global()
eof1.sel(mode=0).plot.contourf(ax=ax,
                               levels=clevs,
                               cmap=plt.cm.RdBu_r,
                               transform=ccrs.PlateCarree(),