Beispiel #1
0
 def Decompose(self):
     '''perform EOF decomposition'''
     solver = Eof(self.rawdata)
     var_frac = solver.varianceFraction()
     cumvar = np.cumsum(var_frac.values)
     self.npcs = np.where(cumvar >= self.prop_variance)[0].min()
     self.pc = solver.pcs(npcs = self.npcs) # time series of PC
Beispiel #2
0
def test_eof_values(shape, n_modes, weight, wrap):
    """Test values relative to Eof package"""
    data = example_da(shape, wrap=wrap)
    lat_dim = f"dim_{len(shape)-1}"
    xeof.core.LAT_NAME = lat_dim
    sensor_dims = [f"dim_{i}" for i in range(1, len(shape))]

    if weight == "none":
        weights = None
    elif weight == "sqrt_cos_lat":
        weights = np.cos(data[lat_dim] * np.pi / 180)**0.5
    elif weight == "random":
        weights = data.isel(time=0).copy()
        weight = weights.compute()

    res = eof(
        data,
        sensor_dims=sensor_dims,
        sample_dim="time",
        weight=weight,
        n_modes=n_modes,
        norm_PCs=False,
    )

    Eof_solver = Eof(data, weights=weights, center=False)
    ver_pcs = Eof_solver.pcs(pcscaling=0, npcs=n_modes)
    ver_eofs = Eof_solver.eofs(eofscaling=0, neofs=n_modes)
    ver_EV = Eof_solver.varianceFraction(neigs=n_modes)

    npt.assert_allclose(abs(res["pc"]), abs(ver_pcs))
    npt.assert_allclose(abs(res["eof"]), abs(ver_eofs))
    npt.assert_allclose(res["explained_var"], ver_EV)
Beispiel #3
0
 def processInputCrossSection(self, request: TaskRequest, node: OpNode,
                              inputDset: EDASDataset) -> EDASDataset:
     nModes = int(node.getParm("modes", 16))
     center = bool(node.getParm("center", "false"))
     merged_input_data, info = self.get_input_array(inputDset)
     shapes = info['shapes']
     slicers = info['slicers']
     solver = Eof(merged_input_data, center=center)
     results = []
     for iMode, eofs_result in enumerate(solver.eofs(neofs=nModes)):
         for iVar, eofs_data in enumerate(
                 self.getResults(eofs_result, slicers, shapes)):
             input = inputDset.inputs[iVar]
             results.append(
                 EDASArray("-".join(["eof-", str(iMode), input.name]),
                           input.domId, eofs_data))
     pcs_result = solver.pcs(npcs=nModes)
     pcs = EDASArray(
         "pcs[" + inputDset.id + "]", inputDset.inputs[0].domId,
         EDASArray.cleanupCoords(pcs_result, {
             "mode": "m",
             "pc": "m"
         }).transpose())
     results.append(pcs)
     fracs = solver.varianceFraction(neigs=nModes)
     pves = [str(round(float(frac * 100.), 1)) + '%' for frac in fracs]
     for result in results:
         result["pves"] = str(pves)
     return EDASDataset.init(self.renameResults(results, node),
                             inputDset.attrs)
Beispiel #4
0
def get_eofs(x):
    import numpy as np
    import xarray
    from eofs.xarray import Eof
    from matplotlib import pyplot as plt

    coslat = np.cos(np.deg2rad(x.lat)).clip(0., 1.)
    wgts = np.sqrt(coslat)[..., np.newaxis]

    from eddof import get_eddof
    DF = np.empty(np.shape(x[0, :, :]))
    for i in range(0, len(x.lat)):
        for j in range(0, len(x.lon)):
            DF[i, j] = get_eddof(x[:, i, j].values)
    edof = np.mean(DF)
    print(edof)
    solver = Eof(x, weights=wgst, ddof=edof)

    var = solver.varianceFraction()
    plt.figure(1)
    plt.bar(np.arange(0, len(var), 1), var * 100)
    plt.show()
    plt.close()

    n = input('Cuantos PC extraer: ')
    eof = solver.eofs(neofs=n, eofscaling=2)
    pc = solver.pcs(npcs=n, pcscaling=1)
    vf = var[:n]

    EOFs = [eof, pc, vf]
    return EOFs
Beispiel #5
0
def plot_pca_analysis(ds, fig_output_path, title=''):
    print(title)
    # var = "T"
    print('done load')

    nbpcs = 3

    solver = Eof(ds.dropna(dim="time", how="all"))
    pcas = solver.pcs(npcs=nbpcs, pcscaling=1)
    eofs = solver.eofs(neofs=nbpcs, eofscaling=1)

    fig, axes = plt.subplots(5, 2, figsize=(20, 20))
    fig.suptitle(title, fontsize=12)

    pcas.plot.line(ax=axes[0, 0], x='time')

    pcas.resample(time='M').mean('time').plot.line(ax=axes[1, 0], x='time')
    axes[1, 0].set_title('Monthly mean')

    pcas.resample(time='Y').mean('time').plot.line(ax=axes[2, 0], x='time')
    axes[2, 0].set_title('Annual mean')

    pcas.groupby('time.month').mean('time').plot.line(ax=axes[3, 0], x='month')
    axes[3, 0].set_title('By Month')

    pcas.groupby('time.hour').mean('time').plot.line(ax=axes[4, 0], x='hour')
    axes[4, 0].set_title('By Hour')

    for pc in range(nbpcs):
        # eofs.isel(mode=pc).plot(ax=axes[pc, 1])
        eofs.to_dataframe().unstack().T.loc[:, pc].plot.bar(ax=axes[pc, 1])

    solver.varianceFraction().isel(mode=slice(0, nbpcs)).plot(ax=axes[3, 1])

    plt.tight_layout()
    plt.tight_layout()
    fig.suptitle(title)
    plt.savefig(fig_output_path + title + '.pdf', bbox_inches='tight')
Beispiel #6
0
    def EOF_SST_analysis(self, xa, weights, n=1, fn=None):
        """ Empirical Orthogonal Function analysis of SST(t,x,y) field; from `SST.py` """
        assert type(xa)==xr.core.dataarray.DataArray
        assert type(weights)==xr.core.dataarray.DataArray
        assert 'time' in xa.dims
        assert np.shape(xa[0,:,:])==np.shape(weights)

        # anomalies by removing time mean
        xa = xa - xa.mean(dim='time')
        # Retrieve the leading EOF, expressed as the covariance between the leading PC
        # time series and the input xa anomalies at each grid point.
        solver = Eof(xa, weights=weights)
        eofs = solver.eofsAsCovariance(neofs=n)
        pcs  = solver.pcs(npcs=n, pcscaling=1)
        eigs = solver.eigenvalues(neigs=n)
        varF = solver.varianceFraction(neigs=n)
        ds = xr.merge([eofs, pcs, eigs, varF])
        if fn!=None:  ds.to_netcdf(fn)
        return ds
Beispiel #7
0
def reof(stack: xr.DataArray, variance_threshold: float = 0.727, n_modes: int = 4) -> xr.Dataset:
    """Function to perform rotated empirical othogonal function (eof) on a spatial timeseries

    args:
        stack (xr.DataArray): DataArray of spatial temporal values with coord order of (t,y,x)
        variance_threshold(float, optional): optional fall back value to select number of eof
            modes to use. Only used if n_modes is less than 1. default = 0.727
        n_modes (int, optional): number of eof modes to use. default = 4

    returns:
        xr.Dataset: rotated eof dataset with spatial modes, temporal modes, and mean values
            as variables

    """
    # extract out some dimension shape information
    shape3d = stack.shape
    spatial_shape = shape3d[1:]
    shape2d = (shape3d[0],np.prod(spatial_shape))

    # flatten the data from [t,y,x] to [t,...]
    da_flat = xr.DataArray(
        stack.values.reshape(shape2d),
        coords = [stack.time,np.arange(shape2d[1])],
        dims=['time','space']
    )
    #logger.debug(da_flat)
        
    ## find the temporal mean for each pixel
    center = da_flat.mean(dim='time')
    
    centered = da_flat - center
               
    # get an eof solver object
    # explicitly set center to false since data is already
    #solver = Eof(centered,center=False)
    solver = Eof(centered,center=False)

    # check if the n_modes keyword is set to a realistic value
    # if not get n_modes based on variance explained
    if n_modes < 0:
        n_modes = int((solver.varianceFraction().cumsum() < variance_threshold).sum())

    # calculate to spatial eof values
    eof_components = solver.eofs(neofs=n_modes).transpose()
    # get the indices where the eof is valid data
    non_masked_idx = np.where(np.logical_not(np.isnan(eof_components[:,0])))[0]

    # create a "blank" array to set roated values to
    rotated = eof_components.values.copy()

    # # waiting for release of sklean version >= 0.24
    # # until then have a placeholder function to do the rotation
    # fa = FactorAnalysis(n_components=n_modes, rotation="varimax")
    # rotated[non_masked_idx,:] = fa.fit_transform(eof_components[non_masked_idx,:])

    # apply varimax rotation to eof components
    # placeholder function until sklearn version >= 0.24
    rotated[non_masked_idx,:] = _ortho_rotation(eof_components[non_masked_idx,:])

    # project the original time series data on the rotated eofs
    projected_pcs = np.dot(centered[:,non_masked_idx], rotated[non_masked_idx,:])

    # reshape the rotated eofs to a 3d array of [y,x,c]
    spatial_rotated = rotated.reshape(spatial_shape+(n_modes,))

    # structure the spatial and temporal reof components in a Dataset
    reof_ds = xr.Dataset(
        {
            "spatial_modes": (["lat","lon","mode"],spatial_rotated),
            "temporal_modes":(["time","mode"],projected_pcs),
            "center": (["lat","lon"],center.values.reshape(spatial_shape))
        },
        coords = {
            "lon":(["lon"],stack.lon),
            "lat":(["lat"],stack.lat),
            "time":stack.time,
            "mode": np.arange(n_modes)+1
        }
    )

    return reof_ds
Beispiel #8
0
for ii in range(3, 8):
    if ii < 4:
        skipnum = 10
    else:
        skipnum = 50
    solver_across = Eof(dat['across track velocity'][:, ::skipnum, :].T[:, :,
                                                                        ii])
    solver_along = Eof(dat['along track velocity'][:, ::skipnum, :].T[:, :,
                                                                      ii])

    figure(figsize=(10, 5))
    subplot(121)
    plot(squeeze(solver_across.eofs(neofs=1)),
         -dat.depth[::skipnum],
         label='mode 1: ' +
         str(int(solver_across.varianceFraction()[0].values * 100)) +
         '% of variance')
    plot(squeeze(solver_across.eofs()[1, :]),
         -dat.depth[::skipnum],
         label='mode 2: ' +
         str(int(solver_across.varianceFraction()[1].values * 100)) +
         '% of variance')
    ylabel('depth [m]')
    xlabel('along-stream velocity [m/s]')
    xlim([-1, 1])
    legend(loc=(0.2, 1.01))
    subplot(122)
    plot(squeeze(solver_along.eofs(neofs=1)),
         -dat.depth[::skipnum],
         label='mode 1: ' +
         str(int(solver_along.varianceFraction()[0].values * 100)) +
def main(args):
    #environmental constants  
    if platform.system() == 'Windows':
        in_dir='../examples/'
        out_dir='../regressors/'
        reg_dir='../regressors/'#${in_dir}'regresory_2013/'
        nc_gen=True
        pdf_gen=False
        plus = ''
    else:
        n_samples = int(os.environ['n_samples'])
        in_dir = os.environ['in_dir']
        #out_dir = os.environ['out_dir']
        reg_dir = os.environ['reg_dir']
        pdf_gen = os.environ['pdf_gen']
        nc_gen = os.environ['nc_gen']

    what_re = args.what_re
    vari = args.vari
    i_year = args.i_year
    s_year = args.s_year
    e_year = args.e_year
    in_file_name = args.in_file_name

    if args.verbose:
        print('dataset: ', what_re)
        print('variable: ', vari)
        print('initial year of dataset: ', i_year)
        print('initial year of analysis: ', s_year)
        print('end year of analysis: ', e_year)
        print('input filename: ', in_file_name)


    print('data opening')
    in_netcdf = in_dir + in_file_name
    print(in_netcdf)
    ds = xr.open_dataset(in_netcdf)
    #print(ds)
    lat_name = fce.get_coords_name(ds, 'latitude')
    lat = ds.coords[lat_name]
    nlat = lat.shape[0]

    lev_name = fce.get_coords_name(ds, 'pressure')
    if ds.coords[lev_name].attrs['units'] == 'Pa':
        lev =  ds.coords[lev_name]/100.
        ds[lev_name] = lev    
    else:
        lev = ds.coords[lev_name]

    
    n = ds.coords['time'].shape[0]
    #it may happen that the field is 3D (longitude is missing)
    try:
        lon_name = fce.get_coords_name(ds, 'longitude')
        lon = ds.coords[lon_name]
        nlon = lon.shape[0]
    except:
        nlon = 1

    #print nlat, nlev, n, nlon

    #zonal mean
    if nlon != 1:
        uwnd = ds[vari].mean(lon_name)
    else:
        uwnd = ds[vari]
      
    #equatorial average and level selection
    sel_dict = {lev_name: fce.coord_Between(lev,10,50), lat_name: fce.coord_Between(lat,-10,10)}    
    zm_u = uwnd.sel(**sel_dict).mean(lat_name)
    #period selection
    times = pd.date_range(str(s_year)+'-01-01', str(e_year)+'-12-31', name='time', freq = 'M')
    zm_u_sel = zm_u.sel(time = times, method='ffill') #nearest
    #remove seasonality
    climatology = zm_u_sel.groupby('time.month').mean('time')
    anomalies = zm_u_sel.groupby('time.month') - climatology

    #print anomalies
    #sys.exit()
    
    #additional constants
    npca = 30
    norm=2 #5
    norms=3 #5
    what_sp = '' # what solar proxy?

    print("regressors' openning")
    global reg#, reg_names, nr
    reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo' , i_year, s_year, e_year, reg_dir)
    nr = reg.shape[1]
    #print(anomalies) 
    #extracting of other variability by MLR
    stacked = anomalies.stack(allpoints = [lev_name])
    stacked = stacked.reset_coords(drop=True)
    resids = stacked.groupby('allpoints').apply(xr_regression)
    resids = resids.rename({'dim_0': 'time'})
    resids['time'] = times
    #EOF analysis
    solver = Eof(resids.T, weights=None) 
    #sys.exit()

    #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.)
    #wgts = np.sqrt(coslat)[np.newaxis,...]   

    for i in range(npca):		
            var_eofs = solver.varianceFraction(neigs=i)
            #print var_eofs
            if np.sum(var_eofs) > 0.95:
                    npca=i
                    total_variance = np.sum(var_eofs)
                    print(total_variance, ' % based on ', i, ' components')
                    break

    var_eofs = solver.varianceFraction(neigs=npca)
    pcs = solver.pcs(npcs=npca, pcscaling=1)
    nte = solver.northTest(neigs=npca, vfscaled=True)

    subdir = './'
    if pdf_gen:
        fig = plt.figure(figsize=(11,8))
        ax1 = fig.add_subplot(111)
        ax1.set_title(str(npca)+' PCAs cover '+str(np.round(total_variance*100, 2))+'% of total variance')
        for i in xrange(npca):
                #plotting
                pcs[:,i].plot(linewidth = 2, ax = ax1, label = 'pca '+str(i+1))

        ax1.set_xlabel('time [years]')
        ax1.set_ylabel('QBO index')
        ax1.set_title('')
        ax1.legend(loc = 'best')
        plt.savefig(reg_dir+'qbo_'+what_re+'_pcas.pdf', bbox_inches='tight')
        plt.close(fig)    
                
    if nc_gen:
        #save to netcdf
        #print(pcs[:,0])
        for i in range(npca):
            pcs_ds = pcs[:,i].to_dataset(name = 'index')
            pcs_ds.to_netcdf(reg_dir+r'qbo_'+what_re+'_pc'+str(i+1)+pripona_nc)       
Beispiel #10
0
print(chlorophyll.dims)
surfchl = chlorophyll[:, 14, :, :]
print(xr.DataArray(surfchl).shape)
chl_mean = surfchl.mean(dim='time')
print(xr.DataArray(chl_mean).shape)
anomaly = surfchl - chl_mean
print(xr.DataArray(anomaly).shape)
solver = Eof(xr.DataArray(anomaly))
eof1 = solver.eofsAsCorrelation(neofs=1)
pc1 = solver.pcs(npcs=1, pcscaling=1)
plt.pcolormesh(lon, lat, eof1[0], cmap=plt.cm.RdBu_r)
plt.xlabel('Longitude')
plt.ylabel('Latitude')
plt.title('EOF1 expressed as Correlation')
cbar = plt.colorbar()
cbar.set_label('Correlation Coefficient', rotation=270)
plt.show()
plt.plot(timearray, pc1[:, 0])
plt.xlabel('Year')
plt.ylabel('Normalized Units')
plt.title('PC1 Time Series')
plt.show()
vF1 = solver.varianceFraction(neigs=6)
percentarray = vF1 * 100
array1 = [1, 2, 3, 4, 5, 6]
plt.bar(array1, percentarray)
plt.title('Scree Plot')
plt.xlabel('Mode')
plt.ylabel('Percent of Variance Explained')
plt.show()
Beispiel #11
0
# --- read netcdf file
dset = xr.open_dataset('asstdt_pacific.nc')

# --- select djf months
sst = dset['sst'].sel(time=np.in1d(dset['time.month'], [1, 2, 12]))

# --- square-root of cosine of latitude weights
coslat = np.cos(np.deg2rad(sst.coords['lat'].values))
wgts = np.sqrt(coslat)[..., np.newaxis]
# --- eof solver
solver = Eof(sst, weights=wgts)
# --- eof results
eofs = solver.eofsAsCorrelation(neofs=2)
pcs = solver.pcs(npcs=2, pcscaling=1)
variance_fractions = solver.varianceFraction()
north_test = solver.northTest(vfscaled=True)


# --- spatial patterns
fig, ax = plot.subplots(axwidth=5, nrows=2, tight=True, proj='pcarree',
                        proj_kw={'lon_0': 180})
# --- format options
ax.format(land=False, coast=True, innerborders=True, borders=True,
          large='15px', labels=False,
          latlim=(31, -31), lonlim=(119, 291),
          geogridlinewidth=0,
          abcloc='ul')
# a) first EOF mode
map1 = ax[0].contourf(dset['lon'], dset['lat'], eofs[0, :, :],
                      levels=np.arange(-0.5, 0.6, 0.1), cmap='Div', extend='both')
Beispiel #12
0
# center = True if we want to remove the mean; =False if no need to remove the mean
'''
If *True*, the mean along the first axis of *dataset* (the
            time-mean) will be removed prior to analysis. If *False*,
            the mean along the first axis will not be removed. Defaults
            to *True* (mean is removed).
            The covariance interpretation relies on the input data being
            anomaly data with a time-mean of 0. Therefore this option
            should usually be set to *True*. Setting this option to
            *True* has the useful side effect of propagating missing
            values along the time dimension, ensuring that a solution
            can be found even if missing values occur in different
            locations at different times.
'''
lambdas = solver.eigenvalues()
vf = solver.varianceFraction()
Nerror = solver.northTest(vfscaled=True)
pcs = solver.pcs()  #(time, mode)
eofs = solver.eofsAsCovariance()
'''
plt.figure()
plt.subplot(3,2,1)
pcs[:, 0].plot()#color='b', linewidth=2)
ax = plt.gca()
ax.axhline(0, color='k')
ax.set_xlabel('Year')
ax.set_ylabel('PC1 amplitude')
plt.grid()
plt.subplot(3,2,2)
pcs[:, 1].plot()
ax = plt.gca()
Beispiel #13
0
psl = psl.sel(time=slice(start, end))

psl_obs = xr.open_dataset('processed_data/remap-woa09_psl_Amon_ERA-Int.nc')[
    'psl']  #units Pa
psl_obs = psl_obs.sel(time=slice(start, end))

# In[246]:

psl_sof20s = psl.sel(lat=slice(-90, -20))
psl_sof20s = psl_sof20s - psl_sof20s.mean(dim='time')
coslat = np.cos(np.deg2rad(psl_sof20s.coords['lat'].values)).clip(0., 1.)
wgts = np.sqrt(coslat)[..., np.newaxis]
#psl_sof20s
solver = Eof(psl_sof20s, weights=wgts)
sh_eof = solver.eofsAsCorrelation(neofs=1)
var_s = solver.varianceFraction(neigs=1)

psl_sof20s_obs = psl_obs.sel(lat=slice(-90, -20))
psl_sof20s_obs = psl_sof20s_obs - psl_sof20s_obs.mean(dim='time')
#psl_sof20s
solver_obs = Eof(psl_sof20s_obs, weights=wgts)
sh_eof_obs = solver_obs.eofsAsCorrelation(neofs=1)
var_s_obs = solver_obs.varianceFraction(neigs=1)

# In[247]:

import iris
import iris.coord_categorisation

cube = iris.load_cube(
    'processed_data/remap-woa09_psl_Amon_CanESM5_historical_r1i1p1f1_gn_185001-201412.nc'
from eofs.xarray import Eof
import datetime
import os

# Read preprocessed data.
DATA_FILE = "/LFASGI/sandroal/data_sets/GIMMS/ppdata_ndvi.nc" 
DS = xr.open_dataset(DATA_FILE)

# Create an EOF solver to do the EOF analysis. Memory intensive operation.
solver = Eof(DS.ndvi)

# Retrieve EOFs, principal component time series, fraction of explained 
# variance, and eigenvalues as xarray DataArray objects for all modes.
EOFs = solver.eofs() 
PCs = solver.pcs()  
FRACs = solver.varianceFraction() 
EIGs = solver.eigenvalues() 

# Attributes for xarray DataSet objects.
attrs = {}
attrs["Description"] = "Empirical orthogonal functions to NDVI (GIMMS) " + \
                       "in its original temporal and spatial resolutions"
attrs["Build"] = "By Alex Araujo"
attrs["Date"] = datetime.datetime.now().strftime("%B %d, %Y; %Hh:%Mmin:%Ss")
attrs["Source"] = os.path.abspath(__file__)

# Set these attributes to results. Must transform from xarray DataArray to 
# DataSets before exporting results as netcdf files.
DAs = [EOFs, PCs, FRACs, EIGs]
names = ["eofs", "pcs", "fracs", "eigs"]
files = ["ppdata_ndvi_eofs_eofs.nc", 
areas = r**2 * coslats * dlats * dlons

Q_s = Q_s[:, :-1, :-1]
Qr = Qr[:, :-1, :-1]
sst = sst[:, :-1, :-1]
tendsst = tendsst[:, :-1, :-1]

tot_area = np.sum(areas)

weights = areas / tot_area

#Calculate EOFs for SST, Qs and Qo. Should weight by area?
solver = Eof(sst, weights=weights)
sst_eof = solver.eofs(neofs=3, eofscaling=2)

sst_eof_varfracs = solver.varianceFraction()

solver = Eof(Qr, weights=weights)
Qo_eof = solver.eofs(neofs=3, eofscaling=2)
Qo_pc = solver.pcs(npcs=3, pcscaling=2)

Qo_eof_varfracs = solver.varianceFraction()

Qo_rec = solver.reconstructedField(5)

Qo_rec_var = Qo_rec.var(dim='time')

#get projection (pseudo-PCs) associated with Qo EOFs
# Qo_eof_projsst = solver.projectField(sst, neofs=3)
# Qo_eof_projQo = solver.projectField(Qr, neofs=3)
start5 = time.time()

os.chdir("/home/ubuntu")
lon = 180
lat = 90
dim = lon * lat
months = 24

data = np.resize(x1, [dim, months])

solver = Eof(xr.DataArray(anomalies.data, dims=['time', 'lat', 'lon']))

pcs = solver.pcs(npcs=3, pcscaling=1)
eofs = solver.eofs(neofs=5, eofscaling=1)

variance_fractions = solver.varianceFraction()
variance_fractions = solver.varianceFraction(neigs=3)
print(variance_fractions)

myFile1 = open('PC1.csv', 'w')
with myFile1:
    writer = csv.writer(myFile1)
    writer.writerows(eofs[0, :, :].data)

myFile2 = open('PC2.csv', 'w')
with myFile2:
    writer = csv.writer(myFile2)
    writer.writerows(eofs[1, :, :].data)

myFile3 = open('PC3.csv', 'w')
with myFile3:
def eof_orca_latlon_box(run, var, modes, lon_bnds, lat_bnds, pathfile, plot,
                        time, eoftype):

    if (var == 'temp'):
        key = 'votemper'
        key1 = "votemper"
    elif (var == 'sal'):
        key = 'vosaline'
        key1 = "vosaline"
    elif (var == 'MLD'):
        key = 'somxl010'
        key1 = "somxl010"

    # read data
    ds = xr.open_dataset(pathfile)
    #ds["time_counter"] = ds['time_counter']+(np.datetime64('0002-01-01')-np.datetime64('0001-01-01'))

    if time == 'comparison':
        ds = ds.sel(time_counter=slice('1958-01-01', '2006-12-31'))

    # cut box for EOF at surface
    if var == 'MLD':
        data = ds[key].sel(lon=slice(lon_bnds[0], lon_bnds[1]),
                           lat=slice(lat_bnds[0], lat_bnds[1]))
        #data = cut_latlon_box(ds[key][:,:,:],ds.lon,ds.lat,
        # lon_bnds,lat_bnds)
    else:
        data = ds[key][:, 0, :, :].sel(lon=slice(lon_bnds[0], lon_bnds[1]),
                                       lat=slice(lat_bnds[0], lat_bnds[1]))
        #data = cut_latlon_box(ds[key][:,0,:,:],ds.lon,ds.lat,
        # lon_bnds,lat_bnds)
    data = data.to_dataset()
    # detrend data
    data[key1] = (['time_counter', 'lat', 'lon'],
                  signal.detrend(data[key].fillna(0), axis=0, type='linear'))

    #data=data.where(data!=0)

    # remove seasonal cycle and drop unnecessary coordinates
    if 'time_centered' in list(data.coords):
        data = deseason_month(data).drop('month').drop(
            'time_centered')  # somehow pca doesn't work otherwise
    else:
        data = deseason_month(data).drop(
            'month')  # somehow pca doesn't work otherwise

    # set 0 values back to nan
    data = data.where(data != 0)

    # EOF analysis
    #Square-root of cosine of latitude weights are applied before the computation of EOFs.
    coslat = np.cos(np.deg2rad(data['lat'].values))
    coslat, _ = np.meshgrid(coslat, np.arange(0, len(data['lon'])))
    wgts = np.sqrt(coslat)
    solver = Eof(data[key], weights=wgts.transpose())
    pcs = solver.pcs(npcs=modes, pcscaling=1)
    if eoftype == 'correlation':
        eof = solver.eofsAsCorrelation(neofs=modes)
    elif eoftype == 'covariance':
        eof = solver.eofsAsCovariance(neofs=modes)
    else:
        eof = solver.eofs(neofs=modes)
    varfr = solver.varianceFraction(neigs=4)
    print(varfr)

    #----------- Plotting --------------------
    plt.close("all")
    if plot == 1:
        for i in np.arange(0, modes):
            fig = plt.figure(figsize=(8, 2))
            ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.9],
                               projection=ccrs.PlateCarree())  # main axes
            ax1.set_extent(
                (lon_bnds[0], lon_bnds[1], lat_bnds[0], lat_bnds[1]))
            # discrete colormap
            cmap = plt.get_cmap('RdYlBu',
                                len(np.arange(10, 30)) -
                                1)  #inferno similar to cmo thermal
            eof[i, :, :].plot(ax=ax1,
                              cbar_kwargs={'label': 'Correlation'},
                              transform=ccrs.PlateCarree(),
                              x='lon',
                              y='lat',
                              add_colorbar=True,
                              cmap=cmap)
            gl = map_stuff(ax1)
            gl.xlocator = mticker.FixedLocator([100, 110, 120])
            gl.ylocator = mticker.FixedLocator(np.arange(-35, -10, 5))
            plt.text(116,
                     -24,
                     str(np.round(varfr[i].values, decimals=2)),
                     horizontalalignment='center',
                     verticalalignment='center',
                     transform=ccrs.PlateCarree(),
                     fontsize=8)

            ax2 = fig.add_axes([0.5, 0.1, 0.55, 0.9])  # main axes
            plt.plot(pcs.time_counter,
                     pcs[:, i].values,
                     linewidth=0.1,
                     color='k')
            anomaly(ax2, pcs.time_counter.values, pcs.values[:, i], [0, 0])
            ax2.set_xlim(
                [pcs.time_counter[0].values, pcs.time_counter[-1].values])
            plt.savefig(pathplots + 'eof_as' + eoftype + '_mode' + str(i) +
                        '_' + time + '_' + run + '_' + var + '.png',
                        dpi=300,
                        bbox_inches='tight',
                        pad_inches=0.1)
            plt.show()
    #----------------------------------------------

    return pcs, eof, varfr
for la in range(0,len(lat_obs)):
    for lo in range(0,len(lon_obs)):
        valid = ~np.isnan(sst_obs[:,la,lo])
        if (valid.any()==True):
            sst_obs[:,la,lo] = signal.detrend(sst_obs[:,la,lo], axis=0, \
                                               type='linear')
        elif (valid.all()==False):
            sst_obs[:,la,lo] = np.nan
'''

# EOF for model
coslat_mdl = np.cos(np.deg2rad(sst_mdl.coords['lat'].values))
wgts_mdl = np.sqrt(coslat_mdl)[..., np.newaxis]
solver_mdl = Eof(sst_mdl, weights=wgts_mdl, center=True)
lambdas_mdl=solver_mdl.eigenvalues()
vf_mdl = solver_mdl.varianceFraction()
Nerror_mdl = solver_mdl.northTest(vfscaled=True)
pcs_mdl = solver_mdl.pcs() #(time, mode)
eofs_mdl = solver_mdl.eofs()
# EOF for obs
coslat_obs = np.cos(np.deg2rad(sst_obs.coords['lat'].values))
wgts_obs = np.sqrt(coslat_obs)[..., np.newaxis]
solver_obs = Eof(sst_obs, weights=wgts_obs, center=True)
lambdas_obs=solver_obs.eigenvalues()
vf_obs = solver_obs.varianceFraction()
Nerror_obs = solver_obs.northTest(vfscaled=True)
pcs_obs = solver_obs.pcs() #(time, mode)
eofs_obs = solver_obs.eofs()


def calcEOF(xrdata, data_var, w, wei=True):
    """
    input:
        xrdata: xarray Dataset
        data_var: string. Variable name to use on EOF.
        w: string. variable for using weights. E.g. latitude

        use as:
            solver, eof1, var1 = calcEOF(xrdata, 'data_var')
    """
    xrdata = xrdata - xrdata[data_var].mean(dim="time")

    # Testing if we can select data from level, lat and time
    try:
        xrdata = xrdata.sel(level=1000,
                            latitude=slice(90, 20),
                            time=slice("1979-01-01", "2000-12-31"))
        print(
            'Data selection OK on first try. Level, lat and time slice done.')
    except ValueError:
        try:
            print('valueError: Trying next')
            xrdata = xrdata.sel(level=1000,
                                lat=slice(90, 20),
                                time=slice("1979-01-01", "2000-12-31"))
            print(
                'Data selection OK on second try. Level, lat and time slice done.'
            )
        except ValueError:
            try:
                print('valueError: Trying next')
                xrdata = xrdata.sel(latitude=slice(90, 20),
                                    time=slice("1979-01-01", "2000-12-31"))
                print('Data selection OK on third try. No level cut')
            except ValueError:
                try:
                    print('valueError: Trying next')
                    xrdata = xrdata.sel(time=slice("1979-01-01", "2000-12-31"))
                    print('Data selection OK on fourth try. Only time slice.')
                except:
                    raise TypeError(' Data out of limits')

    xrdata = (xrdata.groupby('time.month') -
              xrdata[data_var].groupby('time.month').mean())
    #  To ensure equal area weighting for the covariance matrix,
    # the gridded data is weighted by the square root of the cosine of
    # latitude. - NOAA

    if wei == True:
        coslat = np.cos(np.deg2rad(xrdata.coords[w].values)).clip(0., 1.)
        # np.newaxis add a dimention to wgts. dont know what ... does
        # i think its like a transposed. It took all the objects on a list and
        # make a new list with lists inside (each list with only one object)
        # just an adjustment of format
        wgts = np.sqrt(coslat)[..., np.newaxis]
        # The EOF analysis is handled by a solver class, and the EOF solution
        # is computed when the solver class is created. Method calls are then used
        # to retrieve the quantities of interest from the solver class.
        # center = False do not remove mean from data
        # solver = Eof(m_anomalie.hgt, weights=wgts, center=False)
        """
        # solver.eofAsCovariance Returns the EOFs expressed as the covariance between each PC and the input

        # data set at each point in space. they are not actually the EOFs. They tell you how each point in space

        # varies like the given mode. The eofs method provides the raw EOFs (eigenvectors of the covariance

        # matrix) which are the spatial patterns the PCs are the coefficeints of.

        # “The covariance matrix is used for the EOF analysis.” - NOAA """

        solver = Eof(xrdata[data_var], weights=wgts)
    else:
        solver = Eof(xrdata[data_var])
    # solver = Eof(s_anomalie.hgt, weights=wgts, center=False)
    # Retrieve the leading EOF, expressed as the covariance between the leading PC
    # time series and the input SLP anomalies at each grid point.
    eof1 = solver.eofsAsCovariance(pcscaling=1)
    var1 = solver.varianceFraction().sel(mode=0)

    return solver, eof1, var1
def main(args):
    #environmental constants
    if platform.system() == 'Windows':
        in_dir = '../examples/'
        out_dir = '../regressors/'
        reg_dir = '../regressors/'  #${in_dir}'regresory_2013/'
        nc_gen = True
        pdf_gen = False
        plus = ''
    else:
        n_samples = int(os.environ['n_samples'])
        in_dir = os.environ['in_dir']
        #out_dir = os.environ['out_dir']
        reg_dir = os.environ['reg_dir']
        pdf_gen = os.environ['pdf_gen']
        nc_gen = os.environ['nc_gen']

    what_re = args.what_re
    vari = args.vari
    i_year = args.i_year
    s_year = args.s_year
    e_year = args.e_year
    in_file_name = args.in_file_name

    if args.verbose:
        print('dataset: ', what_re)
        print('variable: ', vari)
        print('initial year of dataset: ', i_year)
        print('initial year of analysis: ', s_year)
        print('end year of analysis: ', e_year)
        print('input filename: ', in_file_name)

    print('data opening')
    in_netcdf = in_dir + in_file_name
    print(in_netcdf)
    ds = xr.open_dataset(in_netcdf)
    #print(ds)
    lat_name = fce.get_coords_name(ds, 'latitude')
    lat = ds.coords[lat_name]
    nlat = lat.shape[0]

    lev_name = fce.get_coords_name(ds, 'pressure')
    if ds.coords[lev_name].attrs['units'] == 'Pa':
        lev = ds.coords[lev_name] / 100.
        ds[lev_name] = lev
    else:
        lev = ds.coords[lev_name]

    n = ds.coords['time'].shape[0]
    #it may happen that the field is 3D (longitude is missing)
    try:
        lon_name = fce.get_coords_name(ds, 'longitude')
        lon = ds.coords[lon_name]
        nlon = lon.shape[0]
    except:
        nlon = 1

    #print nlat, nlev, n, nlon

    #zonal mean
    if nlon != 1:
        uwnd = ds[vari].mean(lon_name)
    else:
        uwnd = ds[vari]

    #equatorial average and level selection
    sel_dict = {
        lev_name: fce.coord_Between(lev, 10, 50),
        lat_name: fce.coord_Between(lat, -10, 10)
    }
    zm_u = uwnd.sel(**sel_dict).mean(lat_name)
    #period selection
    times = pd.date_range(str(s_year) + '-01-01',
                          str(e_year) + '-12-31',
                          name='time',
                          freq='M')
    zm_u_sel = zm_u.sel(time=times, method='ffill')  #nearest
    #remove seasonality
    climatology = zm_u_sel.groupby('time.month').mean('time')
    anomalies = zm_u_sel.groupby('time.month') - climatology

    #print anomalies
    #sys.exit()

    #additional constants
    npca = 30
    norm = 2  #5
    norms = 3  #5
    what_sp = ''  # what solar proxy?

    print("regressors' openning")
    global reg  #, reg_names, nr
    reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm,
                                                     'no_qbo', i_year, s_year,
                                                     e_year, reg_dir)
    nr = reg.shape[1]
    #print(anomalies)
    #extracting of other variability by MLR
    stacked = anomalies.stack(allpoints=[lev_name])
    stacked = stacked.reset_coords(drop=True)
    resids = stacked.groupby('allpoints').apply(xr_regression)
    resids = resids.rename({'dim_0': 'time'})
    resids['time'] = times
    #EOF analysis
    solver = Eof(resids.T, weights=None)
    #sys.exit()

    #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.)
    #wgts = np.sqrt(coslat)[np.newaxis,...]

    for i in range(npca):
        var_eofs = solver.varianceFraction(neigs=i)
        #print var_eofs
        if np.sum(var_eofs) > 0.95:
            npca = i
            total_variance = np.sum(var_eofs)
            print(total_variance, ' % based on ', i, ' components')
            break

    var_eofs = solver.varianceFraction(neigs=npca)
    pcs = solver.pcs(npcs=npca, pcscaling=1)
    nte = solver.northTest(neigs=npca, vfscaled=True)

    subdir = './'
    if pdf_gen:
        fig = plt.figure(figsize=(11, 8))
        ax1 = fig.add_subplot(111)
        ax1.set_title(
            str(npca) + ' PCAs cover ' +
            str(np.round(total_variance * 100, 2)) + '% of total variance')
        for i in xrange(npca):
            #plotting
            pcs[:, i].plot(linewidth=2, ax=ax1, label='pca ' + str(i + 1))

        ax1.set_xlabel('time [years]')
        ax1.set_ylabel('QBO index')
        ax1.set_title('')
        ax1.legend(loc='best')
        plt.savefig(reg_dir + 'qbo_' + what_re + '_pcas.pdf',
                    bbox_inches='tight')
        plt.close(fig)

    if nc_gen:
        #save to netcdf
        #print(pcs[:,0])
        for i in range(npca):
            pcs_ds = pcs[:, i].to_dataset(name='index')
            pcs_ds.to_netcdf(reg_dir + r'qbo_' + what_re + '_pc' + str(i + 1) +
                             pripona_nc)
Beispiel #21
0
def main():
    ens = sys.argv[1]
    sYear = sys.argv[2]
    eYear = sys.argv[3]
    if int(sYear) < 1920:
        raise ValueError("Starting year must be 1920 or later.")
    if int(eYear) > 2100:
        raise ValueError("End year must be 2100 or earlier.")
    print("Computing NPGO for ensemble number " + ens + "...")
    filepath = ('/glade/scratch/rbrady/EBUS_BGC_Variability/' +
                'global_residuals/SST/remapped/remapped.SST.' + ens +
                '.192001-210012.nc')
    print("Global residuals loaded...")
    ds = xr.open_dataset(filepath)
    ds = ds['SST'].squeeze()
    # Make time dimension readable through xarray.
    ds['time'] = pd.date_range('1920-01', '2101-01', freq='M')
    # Reduce to time period of interest.
    ds = ds.sel(time=slice(sYear + '-01', eYear + '-12'))
    # Slice down to Northeast Pacific domain.
    ds = ds.sel(lat=slice(25, 62), lon=slice(180, 250))
    # Take annual JFM means.
    month = ds['time.month']
    JFM = (month <= 3)
    ds_winter = ds.where(JFM).resample('A', 'time')
    # Compute EOF
    coslat = np.cos(np.deg2rad(ds_winter.lat.values))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(ds_winter, weights=wgts, center=False)
    print("NPGO computed.")
    eof = solver.eofsAsCorrelation(neofs=2)
    variance = solver.varianceFraction(neigs=2)
    # Reconstruct the monthly index of SSTa by projecting
    # these values onto the annual PC timeseries.
    pseudo_pc = solver.projectField(ds, neofs=2, eofscaling=1)
    # Set up as dataset.
    ds = eof.to_dataset()
    ds['pc'] = pseudo_pc
    ds['variance_fraction'] = variance
    ds = ds.rename({'eofs': 'eof'})
    ds = ds.sel(mode=1)
    # Invert to the proper values for the bullseye.
    if ds.sel(lat=45.5, lon=210).eof < 0:
        pass
    else:
        ds['eof'] = ds['eof'] * -1
        ds['pc'] = ds['pc'] * -1
    # Change some attributes for the variables.
    ds['eof'].attrs['long_name'] = 'Correlation between PC and JFM SSTa'
    ds['pc'].attrs['long_name'] = 'Principal component for NPGO'
    # Add a description of methods for clarity.
    ds.attrs[
        'description'] = 'Second mode of JFM SSTa variability over 25-62N and 180-110W.'
    ds.attrs[
        'anomalies'] = 'Anomalies were computed by removing the ensemble mean at each grid cell.'
    ds.attrs['weighting'] = (
        'The native grid was regridded to a standard 1deg x 1deg (180x360) grid.'
        + 'Weighting was computed via the sqrt of the cosine of latitude.')
    print("Saving to netCDF...")
    ds.to_netcdf('/glade/p/work/rbrady/NPGO/NPGO.' + ens + '.' + str(sYear) +
                 '-' + str(eYear) + '.nc')