Ejemplo n.º 1
0
 def processInputCrossSection(self, request: TaskRequest, node: OpNode,
                              inputDset: EDASDataset) -> EDASDataset:
     nModes = int(node.getParm("modes", 16))
     center = bool(node.getParm("center", "false"))
     merged_input_data, info = self.get_input_array(inputDset)
     shapes = info['shapes']
     slicers = info['slicers']
     solver = Eof(merged_input_data, center=center)
     results = []
     for iMode, eofs_result in enumerate(solver.eofs(neofs=nModes)):
         for iVar, eofs_data in enumerate(
                 self.getResults(eofs_result, slicers, shapes)):
             input = inputDset.inputs[iVar]
             results.append(
                 EDASArray("-".join(["eof-", str(iMode), input.name]),
                           input.domId, eofs_data))
     pcs_result = solver.pcs(npcs=nModes)
     pcs = EDASArray(
         "pcs[" + inputDset.id + "]", inputDset.inputs[0].domId,
         EDASArray.cleanupCoords(pcs_result, {
             "mode": "m",
             "pc": "m"
         }).transpose())
     results.append(pcs)
     fracs = solver.varianceFraction(neigs=nModes)
     pves = [str(round(float(frac * 100.), 1)) + '%' for frac in fracs]
     for result in results:
         result["pves"] = str(pves)
     return EDASDataset.init(self.renameResults(results, node),
                             inputDset.attrs)
Ejemplo n.º 2
0
def get_eofs(x):
    import numpy as np
    import xarray
    from eofs.xarray import Eof
    from matplotlib import pyplot as plt

    coslat = np.cos(np.deg2rad(x.lat)).clip(0., 1.)
    wgts = np.sqrt(coslat)[..., np.newaxis]

    from eddof import get_eddof
    DF = np.empty(np.shape(x[0, :, :]))
    for i in range(0, len(x.lat)):
        for j in range(0, len(x.lon)):
            DF[i, j] = get_eddof(x[:, i, j].values)
    edof = np.mean(DF)
    print(edof)
    solver = Eof(x, weights=wgst, ddof=edof)

    var = solver.varianceFraction()
    plt.figure(1)
    plt.bar(np.arange(0, len(var), 1), var * 100)
    plt.show()
    plt.close()

    n = input('Cuantos PC extraer: ')
    eof = solver.eofs(neofs=n, eofscaling=2)
    pc = solver.pcs(npcs=n, pcscaling=1)
    vf = var[:n]

    EOFs = [eof, pc, vf]
    return EOFs
Ejemplo n.º 3
0
def PDO(dat1, dat2):
    solver = Eof(dat1)
    pc1 = solver.pcs(npcs=1, pcscaling=1)
    sigs = solver.eigenvalues()
    eofs = solver.eofs()
    eof1 = eofs[0]
    # normalise sigs
    sigs1 = sigs / sigs.sum()
    # not used eof1 = solver.eofsAsCorrelation(neofs=1)
    #    pcs=solver.pcs()

    # filter first pdf
    Nt = dat1.time.size
    fp = fft.fft(pc1[:, 0])
    x = fft.fftfreq(Nt, 1 / 12.)  # cycles per year
    # lowpass filter at 0.1 per year
    i = abs(x) <= .1
    fp_fil = fp * i
    #plt.plot(x,abs(fp))
    #plt.plot(x,abs(fp_fil))
    pfil = fft.ifft(fp_fil)
    #
    pc1_fil = pc1[:, 0] * 0 + np.real(pfil)
    #plt.plot(pc1_fil)
    #print(pc1)
    print(pc1_fil)
    tmp = np.imag(pfil)
    print(tmp.max)
    # correlate with sst field

    astd, bstd, abstd, bhat = regresst(pc1_fil, dat2)
    r = abstd / (astd * bstd)
    slope = r * bstd

    return sigs1, eof1, slope, pc1, pc1_fil, x, fp_fil
Ejemplo n.º 4
0
def test_eof_values(shape, n_modes, weight, wrap):
    """Test values relative to Eof package"""
    data = example_da(shape, wrap=wrap)
    lat_dim = f"dim_{len(shape)-1}"
    xeof.core.LAT_NAME = lat_dim
    sensor_dims = [f"dim_{i}" for i in range(1, len(shape))]

    if weight == "none":
        weights = None
    elif weight == "sqrt_cos_lat":
        weights = np.cos(data[lat_dim] * np.pi / 180)**0.5
    elif weight == "random":
        weights = data.isel(time=0).copy()
        weight = weights.compute()

    res = eof(
        data,
        sensor_dims=sensor_dims,
        sample_dim="time",
        weight=weight,
        n_modes=n_modes,
        norm_PCs=False,
    )

    Eof_solver = Eof(data, weights=weights, center=False)
    ver_pcs = Eof_solver.pcs(pcscaling=0, npcs=n_modes)
    ver_eofs = Eof_solver.eofs(eofscaling=0, neofs=n_modes)
    ver_EV = Eof_solver.varianceFraction(neigs=n_modes)

    npt.assert_allclose(abs(res["pc"]), abs(ver_pcs))
    npt.assert_allclose(abs(res["eof"]), abs(ver_eofs))
    npt.assert_allclose(res["explained_var"], ver_EV)
Ejemplo n.º 5
0
 def Decompose(self):
     '''perform EOF decomposition'''
     solver = Eof(self.rawdata)
     var_frac = solver.varianceFraction()
     cumvar = np.cumsum(var_frac.values)
     self.npcs = np.where(cumvar >= self.prop_variance)[0].min()
     self.pc = solver.pcs(npcs = self.npcs) # time series of PC
Ejemplo n.º 6
0
def EOF_SST_analysis(xa, weights, neofs=1, npcs=1, fn=None):
    """ Empirical Orthogonal Function of SST(t,x,y) field """
    assert type(xa) == xr.core.dataarray.DataArray
    assert type(weights) == xr.core.dataarray.DataArray
    assert 'time' in xa.dims
    assert np.shape(xa[0, :, :]) == np.shape(weights)

    # anomalies by removing time mean
    xa = xa - xa.mean(dim='time')
    # Retrieve the leading EOF, expressed as the covariance between the leading PC
    # time series and the input xa anomalies at each grid point.
    solver = Eof(xa, weights=weights)
    eofs = solver.eofsAsCovariance(neofs=neofs)
    pcs = solver.pcs(npcs=npcs, pcscaling=1)
    if fn != None:
        xr.merge([eofs, pcs]).to_netcdf(fn)
    return eofs, pcs
Ejemplo n.º 7
0
    def EOF_SST_analysis(self, xa, weights, n=1, fn=None):
        """ Empirical Orthogonal Function analysis of SST(t,x,y) field; from `SST.py` """
        assert type(xa)==xr.core.dataarray.DataArray
        assert type(weights)==xr.core.dataarray.DataArray
        assert 'time' in xa.dims
        assert np.shape(xa[0,:,:])==np.shape(weights)

        # anomalies by removing time mean
        xa = xa - xa.mean(dim='time')
        # Retrieve the leading EOF, expressed as the covariance between the leading PC
        # time series and the input xa anomalies at each grid point.
        solver = Eof(xa, weights=weights)
        eofs = solver.eofsAsCovariance(neofs=n)
        pcs  = solver.pcs(npcs=n, pcscaling=1)
        eigs = solver.eigenvalues(neigs=n)
        varF = solver.varianceFraction(neigs=n)
        ds = xr.merge([eofs, pcs, eigs, varF])
        if fn!=None:  ds.to_netcdf(fn)
        return ds
Ejemplo n.º 8
0
def plot_pca_analysis(ds, fig_output_path, title=''):
    print(title)
    # var = "T"
    print('done load')

    nbpcs = 3

    solver = Eof(ds.dropna(dim="time", how="all"))
    pcas = solver.pcs(npcs=nbpcs, pcscaling=1)
    eofs = solver.eofs(neofs=nbpcs, eofscaling=1)

    fig, axes = plt.subplots(5, 2, figsize=(20, 20))
    fig.suptitle(title, fontsize=12)

    pcas.plot.line(ax=axes[0, 0], x='time')

    pcas.resample(time='M').mean('time').plot.line(ax=axes[1, 0], x='time')
    axes[1, 0].set_title('Monthly mean')

    pcas.resample(time='Y').mean('time').plot.line(ax=axes[2, 0], x='time')
    axes[2, 0].set_title('Annual mean')

    pcas.groupby('time.month').mean('time').plot.line(ax=axes[3, 0], x='month')
    axes[3, 0].set_title('By Month')

    pcas.groupby('time.hour').mean('time').plot.line(ax=axes[4, 0], x='hour')
    axes[4, 0].set_title('By Hour')

    for pc in range(nbpcs):
        # eofs.isel(mode=pc).plot(ax=axes[pc, 1])
        eofs.to_dataframe().unstack().T.loc[:, pc].plot.bar(ax=axes[pc, 1])

    solver.varianceFraction().isel(mode=slice(0, nbpcs)).plot(ax=axes[3, 1])

    plt.tight_layout()
    plt.tight_layout()
    fig.suptitle(title)
    plt.savefig(fig_output_path + title + '.pdf', bbox_inches='tight')
Ejemplo n.º 9
0
def eofunc_pcs(data,
               npcs=1,
               time_dim=0,
               pcscaling=0,
               weights=None,
               center=True,
               ddof=1,
               meta=False):
    """
    Computes the principal components (time projection) in the empirical orthogonal function
    analysis.

    Note: `eofunc_pcs` allows to perform the analysis that was previously done via the NCL function `eofunc_ts`.
    However, there are a few changes to the NCL flow such as : (1) Only `np.nan` is supported as missing value,
    (2) EOFs are computed only from covariance matrix and there is no support for computation from correlation matrix,
    (3) percentage of non-missing points that must exist at any single point is no longer an input.

    This implementation uses `eofs` package (https://anaconda.org/conda-forge/eofs), which is built upon the
    following study: Dawson, Andrew, "eofs: A library for EOF analysis of meteorological, oceanographic, and
    climate data," Journal of Open Research Software, vol. 4, no. 1, 2016. Further information about this
    package can be found at: https://ajdawson.github.io/eofs/latest/index.html#

    This implementation provides a few conveniences to the user on top of `eofs` package that are described below
    in the Parameters section.

    Parameters
    ----------
    data : :class:`xarray.DataArray` or :class:`numpy.ndarray` or :class:`list`
        Should contain numbers or `np.nan` for missing value representation. It must be at least a 2-dimensional array.

        When input data is of type `xarray.DataArray`, `eofs.xarray` interface assumes the left-most dimension
        (i.e. `dim_0`) is the `time` dimension. In this case, that dimension should have the name "time".

        When input data is of type `numpy.ndarray` or `list`, this function still assumes the leftmost dimension
        to be the number of observations or `time` dimension: however, in this case, user is allowed to input otherwise.
        If the input do not have its leftmost dimension as the `time` or number of observations, then the user should
        specify with `time_dim=x` to define which dimension must be treated as time or number of observations

    npcs:
        A scalar integer that specifies the number of principal components (i.e. eigenvalues and eigenvectors) to be
        returned. This is usually less than or equal to the minimum number of observations or number of variables.

    time_dim:
        An integer defining the time dimension if it is not the leftmost dimension. When input data is of type
        `xarray.DataArray`, this is ignored (assuming `xarray.DataArray` has its leftmost dimension with the exact
        name 'time'). It must be between ``0`` and ``data.ndim - 1`` or it could be ``-1`` indicating the last
        dimension. Defaults to 0.

        Note: The `time_dim` argument allows to perform the EOF analysis that was previously done via the NCL
        function `eofunc_ts_n`.

    pcscaling:
        (From `eofs` package): Sets the scaling of the retrieved PCs. The following values are accepted:
            - 0 : Un-scaled PCs (default).
            - 1 : PCs are divided by the square-root of their eigenvalues.
            - 2 : PCs are multiplied by the square-root of their eigenvalues.

    weights:
        (From `eofs` package): An array of weights whose shape is compatible with those of the input array dataset.
        The weights can have the same shape as dataset or a shape compatible with an array broadcast (i.e., the shape
        of the weights can can match the rightmost parts of the shape of the input array dataset). If the input array
        dataset does not require weighting then the value None may be used. Defaults to None (no weighting).

    center:
        (From `eofs` package): If True, the mean along the first axis of dataset (the time-mean) will be removed prior
        to analysis. If False, the mean along the first axis will not be removed. Defaults to True (mean is removed).

        The covariance interpretation relies on the input data being anomaly data with a time-mean of 0. Therefore this
        option should usually be set to True. Setting this option to True has the useful side effect of propagating
        missing values along the time dimension, ensuring that a solution can be found even if missing values occur
        in different locations at different times.

    ddof:
        (From `eofs` package): ‘Delta degrees of freedom’. The divisor used to normalize the covariance matrix is
        N - ddof where N is the number of samples. Defaults to 1.

    meta:
        If set to True and the input array is an Xarray, the metadata from the input array will be copied to the
        output array. Defaults to False.

    Returns
    -------

    """

    data, solver = _generate_eofs_solver(data,
                                         time_dim=time_dim,
                                         weights=weights,
                                         center=center,
                                         ddof=ddof)

    # Checking number of EOFs
    if npcs <= 0:
        raise ValueError(
            "ERROR eofunc_pcs: num_pcs must be a positive non-zero integer value."
        )

    solver = Eof(data, weights=weights, center=center, ddof=ddof)

    pcs = solver.pcs(npcs=npcs, pcscaling=pcscaling)
    pcs = pcs.transpose()

    # Populate attributes for output
    attrs = {}

    if meta:
        attrs = data.attrs

    dims = ["pc", "time"]
    if meta:
        coords = {"time": data.coords[data.dims[time_dim]]}
    else:
        coords = {}

    return xr.DataArray(pcs, attrs=attrs, dims=dims, coords=coords)
Ejemplo n.º 10
0
def main(args):
    #environmental constants  
    if platform.system() == 'Windows':
        in_dir='../examples/'
        out_dir='../regressors/'
        reg_dir='../regressors/'#${in_dir}'regresory_2013/'
        nc_gen=True
        pdf_gen=False
        plus = ''
    else:
        n_samples = int(os.environ['n_samples'])
        in_dir = os.environ['in_dir']
        #out_dir = os.environ['out_dir']
        reg_dir = os.environ['reg_dir']
        pdf_gen = os.environ['pdf_gen']
        nc_gen = os.environ['nc_gen']

    what_re = args.what_re
    vari = args.vari
    i_year = args.i_year
    s_year = args.s_year
    e_year = args.e_year
    in_file_name = args.in_file_name

    if args.verbose:
        print('dataset: ', what_re)
        print('variable: ', vari)
        print('initial year of dataset: ', i_year)
        print('initial year of analysis: ', s_year)
        print('end year of analysis: ', e_year)
        print('input filename: ', in_file_name)


    print('data opening')
    in_netcdf = in_dir + in_file_name
    print(in_netcdf)
    ds = xr.open_dataset(in_netcdf)
    #print(ds)
    lat_name = fce.get_coords_name(ds, 'latitude')
    lat = ds.coords[lat_name]
    nlat = lat.shape[0]

    lev_name = fce.get_coords_name(ds, 'pressure')
    if ds.coords[lev_name].attrs['units'] == 'Pa':
        lev =  ds.coords[lev_name]/100.
        ds[lev_name] = lev    
    else:
        lev = ds.coords[lev_name]

    
    n = ds.coords['time'].shape[0]
    #it may happen that the field is 3D (longitude is missing)
    try:
        lon_name = fce.get_coords_name(ds, 'longitude')
        lon = ds.coords[lon_name]
        nlon = lon.shape[0]
    except:
        nlon = 1

    #print nlat, nlev, n, nlon

    #zonal mean
    if nlon != 1:
        uwnd = ds[vari].mean(lon_name)
    else:
        uwnd = ds[vari]
      
    #equatorial average and level selection
    sel_dict = {lev_name: fce.coord_Between(lev,10,50), lat_name: fce.coord_Between(lat,-10,10)}    
    zm_u = uwnd.sel(**sel_dict).mean(lat_name)
    #period selection
    times = pd.date_range(str(s_year)+'-01-01', str(e_year)+'-12-31', name='time', freq = 'M')
    zm_u_sel = zm_u.sel(time = times, method='ffill') #nearest
    #remove seasonality
    climatology = zm_u_sel.groupby('time.month').mean('time')
    anomalies = zm_u_sel.groupby('time.month') - climatology

    #print anomalies
    #sys.exit()
    
    #additional constants
    npca = 30
    norm=2 #5
    norms=3 #5
    what_sp = '' # what solar proxy?

    print("regressors' openning")
    global reg#, reg_names, nr
    reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm, 'no_qbo' , i_year, s_year, e_year, reg_dir)
    nr = reg.shape[1]
    #print(anomalies) 
    #extracting of other variability by MLR
    stacked = anomalies.stack(allpoints = [lev_name])
    stacked = stacked.reset_coords(drop=True)
    resids = stacked.groupby('allpoints').apply(xr_regression)
    resids = resids.rename({'dim_0': 'time'})
    resids['time'] = times
    #EOF analysis
    solver = Eof(resids.T, weights=None) 
    #sys.exit()

    #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.)
    #wgts = np.sqrt(coslat)[np.newaxis,...]   

    for i in range(npca):		
            var_eofs = solver.varianceFraction(neigs=i)
            #print var_eofs
            if np.sum(var_eofs) > 0.95:
                    npca=i
                    total_variance = np.sum(var_eofs)
                    print(total_variance, ' % based on ', i, ' components')
                    break

    var_eofs = solver.varianceFraction(neigs=npca)
    pcs = solver.pcs(npcs=npca, pcscaling=1)
    nte = solver.northTest(neigs=npca, vfscaled=True)

    subdir = './'
    if pdf_gen:
        fig = plt.figure(figsize=(11,8))
        ax1 = fig.add_subplot(111)
        ax1.set_title(str(npca)+' PCAs cover '+str(np.round(total_variance*100, 2))+'% of total variance')
        for i in xrange(npca):
                #plotting
                pcs[:,i].plot(linewidth = 2, ax = ax1, label = 'pca '+str(i+1))

        ax1.set_xlabel('time [years]')
        ax1.set_ylabel('QBO index')
        ax1.set_title('')
        ax1.legend(loc = 'best')
        plt.savefig(reg_dir+'qbo_'+what_re+'_pcas.pdf', bbox_inches='tight')
        plt.close(fig)    
                
    if nc_gen:
        #save to netcdf
        #print(pcs[:,0])
        for i in range(npca):
            pcs_ds = pcs[:,i].to_dataset(name = 'index')
            pcs_ds.to_netcdf(reg_dir+r'qbo_'+what_re+'_pc'+str(i+1)+pripona_nc)       
Ejemplo n.º 11
0
If *True*, the mean along the first axis of *dataset* (the
            time-mean) will be removed prior to analysis. If *False*,
            the mean along the first axis will not be removed. Defaults
            to *True* (mean is removed).
            The covariance interpretation relies on the input data being
            anomaly data with a time-mean of 0. Therefore this option
            should usually be set to *True*. Setting this option to
            *True* has the useful side effect of propagating missing
            values along the time dimension, ensuring that a solution
            can be found even if missing values occur in different
            locations at different times.
'''
lambdas = solver.eigenvalues()
vf = solver.varianceFraction()
Nerror = solver.northTest(vfscaled=True)
pcs = solver.pcs()  #(time, mode)
eofs = solver.eofsAsCovariance()
'''
plt.figure()
plt.subplot(3,2,1)
pcs[:, 0].plot()#color='b', linewidth=2)
ax = plt.gca()
ax.axhline(0, color='k')
ax.set_xlabel('Year')
ax.set_ylabel('PC1 amplitude')
plt.grid()
plt.subplot(3,2,2)
pcs[:, 1].plot()
ax = plt.gca()
ax.axhline(0, color='k')
ax.set_xlabel('Year')
def eof_orca_latlon_box(run, var, modes, lon_bnds, lat_bnds, pathfile, plot,
                        time, eoftype):

    if (var == 'temp'):
        key = 'votemper'
        key1 = "votemper"
    elif (var == 'sal'):
        key = 'vosaline'
        key1 = "vosaline"
    elif (var == 'MLD'):
        key = 'somxl010'
        key1 = "somxl010"

    # read data
    ds = xr.open_dataset(pathfile)
    #ds["time_counter"] = ds['time_counter']+(np.datetime64('0002-01-01')-np.datetime64('0001-01-01'))

    if time == 'comparison':
        ds = ds.sel(time_counter=slice('1958-01-01', '2006-12-31'))

    # cut box for EOF at surface
    if var == 'MLD':
        data = ds[key].sel(lon=slice(lon_bnds[0], lon_bnds[1]),
                           lat=slice(lat_bnds[0], lat_bnds[1]))
        #data = cut_latlon_box(ds[key][:,:,:],ds.lon,ds.lat,
        # lon_bnds,lat_bnds)
    else:
        data = ds[key][:, 0, :, :].sel(lon=slice(lon_bnds[0], lon_bnds[1]),
                                       lat=slice(lat_bnds[0], lat_bnds[1]))
        #data = cut_latlon_box(ds[key][:,0,:,:],ds.lon,ds.lat,
        # lon_bnds,lat_bnds)
    data = data.to_dataset()
    # detrend data
    data[key1] = (['time_counter', 'lat', 'lon'],
                  signal.detrend(data[key].fillna(0), axis=0, type='linear'))

    #data=data.where(data!=0)

    # remove seasonal cycle and drop unnecessary coordinates
    if 'time_centered' in list(data.coords):
        data = deseason_month(data).drop('month').drop(
            'time_centered')  # somehow pca doesn't work otherwise
    else:
        data = deseason_month(data).drop(
            'month')  # somehow pca doesn't work otherwise

    # set 0 values back to nan
    data = data.where(data != 0)

    # EOF analysis
    #Square-root of cosine of latitude weights are applied before the computation of EOFs.
    coslat = np.cos(np.deg2rad(data['lat'].values))
    coslat, _ = np.meshgrid(coslat, np.arange(0, len(data['lon'])))
    wgts = np.sqrt(coslat)
    solver = Eof(data[key], weights=wgts.transpose())
    pcs = solver.pcs(npcs=modes, pcscaling=1)
    if eoftype == 'correlation':
        eof = solver.eofsAsCorrelation(neofs=modes)
    elif eoftype == 'covariance':
        eof = solver.eofsAsCovariance(neofs=modes)
    else:
        eof = solver.eofs(neofs=modes)
    varfr = solver.varianceFraction(neigs=4)
    print(varfr)

    #----------- Plotting --------------------
    plt.close("all")
    if plot == 1:
        for i in np.arange(0, modes):
            fig = plt.figure(figsize=(8, 2))
            ax1 = fig.add_axes([0.1, 0.1, 0.3, 0.9],
                               projection=ccrs.PlateCarree())  # main axes
            ax1.set_extent(
                (lon_bnds[0], lon_bnds[1], lat_bnds[0], lat_bnds[1]))
            # discrete colormap
            cmap = plt.get_cmap('RdYlBu',
                                len(np.arange(10, 30)) -
                                1)  #inferno similar to cmo thermal
            eof[i, :, :].plot(ax=ax1,
                              cbar_kwargs={'label': 'Correlation'},
                              transform=ccrs.PlateCarree(),
                              x='lon',
                              y='lat',
                              add_colorbar=True,
                              cmap=cmap)
            gl = map_stuff(ax1)
            gl.xlocator = mticker.FixedLocator([100, 110, 120])
            gl.ylocator = mticker.FixedLocator(np.arange(-35, -10, 5))
            plt.text(116,
                     -24,
                     str(np.round(varfr[i].values, decimals=2)),
                     horizontalalignment='center',
                     verticalalignment='center',
                     transform=ccrs.PlateCarree(),
                     fontsize=8)

            ax2 = fig.add_axes([0.5, 0.1, 0.55, 0.9])  # main axes
            plt.plot(pcs.time_counter,
                     pcs[:, i].values,
                     linewidth=0.1,
                     color='k')
            anomaly(ax2, pcs.time_counter.values, pcs.values[:, i], [0, 0])
            ax2.set_xlim(
                [pcs.time_counter[0].values, pcs.time_counter[-1].values])
            plt.savefig(pathplots + 'eof_as' + eoftype + '_mode' + str(i) +
                        '_' + time + '_' + run + '_' + var + '.png',
                        dpi=300,
                        bbox_inches='tight',
                        pad_inches=0.1)
            plt.show()
    #----------------------------------------------

    return pcs, eof, varfr
def main(args):
    #environmental constants
    if platform.system() == 'Windows':
        in_dir = '../examples/'
        out_dir = '../regressors/'
        reg_dir = '../regressors/'  #${in_dir}'regresory_2013/'
        nc_gen = True
        pdf_gen = False
        plus = ''
    else:
        n_samples = int(os.environ['n_samples'])
        in_dir = os.environ['in_dir']
        #out_dir = os.environ['out_dir']
        reg_dir = os.environ['reg_dir']
        pdf_gen = os.environ['pdf_gen']
        nc_gen = os.environ['nc_gen']

    what_re = args.what_re
    vari = args.vari
    i_year = args.i_year
    s_year = args.s_year
    e_year = args.e_year
    in_file_name = args.in_file_name

    if args.verbose:
        print('dataset: ', what_re)
        print('variable: ', vari)
        print('initial year of dataset: ', i_year)
        print('initial year of analysis: ', s_year)
        print('end year of analysis: ', e_year)
        print('input filename: ', in_file_name)

    print('data opening')
    in_netcdf = in_dir + in_file_name
    print(in_netcdf)
    ds = xr.open_dataset(in_netcdf)
    #print(ds)
    lat_name = fce.get_coords_name(ds, 'latitude')
    lat = ds.coords[lat_name]
    nlat = lat.shape[0]

    lev_name = fce.get_coords_name(ds, 'pressure')
    if ds.coords[lev_name].attrs['units'] == 'Pa':
        lev = ds.coords[lev_name] / 100.
        ds[lev_name] = lev
    else:
        lev = ds.coords[lev_name]

    n = ds.coords['time'].shape[0]
    #it may happen that the field is 3D (longitude is missing)
    try:
        lon_name = fce.get_coords_name(ds, 'longitude')
        lon = ds.coords[lon_name]
        nlon = lon.shape[0]
    except:
        nlon = 1

    #print nlat, nlev, n, nlon

    #zonal mean
    if nlon != 1:
        uwnd = ds[vari].mean(lon_name)
    else:
        uwnd = ds[vari]

    #equatorial average and level selection
    sel_dict = {
        lev_name: fce.coord_Between(lev, 10, 50),
        lat_name: fce.coord_Between(lat, -10, 10)
    }
    zm_u = uwnd.sel(**sel_dict).mean(lat_name)
    #period selection
    times = pd.date_range(str(s_year) + '-01-01',
                          str(e_year) + '-12-31',
                          name='time',
                          freq='M')
    zm_u_sel = zm_u.sel(time=times, method='ffill')  #nearest
    #remove seasonality
    climatology = zm_u_sel.groupby('time.month').mean('time')
    anomalies = zm_u_sel.groupby('time.month') - climatology

    #print anomalies
    #sys.exit()

    #additional constants
    npca = 30
    norm = 2  #5
    norms = 3  #5
    what_sp = ''  # what solar proxy?

    print("regressors' openning")
    global reg  #, reg_names, nr
    reg, reg_names, history = fce.configuration_ccmi(what_re, what_sp, norm,
                                                     'no_qbo', i_year, s_year,
                                                     e_year, reg_dir)
    nr = reg.shape[1]
    #print(anomalies)
    #extracting of other variability by MLR
    stacked = anomalies.stack(allpoints=[lev_name])
    stacked = stacked.reset_coords(drop=True)
    resids = stacked.groupby('allpoints').apply(xr_regression)
    resids = resids.rename({'dim_0': 'time'})
    resids['time'] = times
    #EOF analysis
    solver = Eof(resids.T, weights=None)
    #sys.exit()

    #coslat = np.cos(np.deg2rad(lat)).clip(0.,1.)
    #wgts = np.sqrt(coslat)[np.newaxis,...]

    for i in range(npca):
        var_eofs = solver.varianceFraction(neigs=i)
        #print var_eofs
        if np.sum(var_eofs) > 0.95:
            npca = i
            total_variance = np.sum(var_eofs)
            print(total_variance, ' % based on ', i, ' components')
            break

    var_eofs = solver.varianceFraction(neigs=npca)
    pcs = solver.pcs(npcs=npca, pcscaling=1)
    nte = solver.northTest(neigs=npca, vfscaled=True)

    subdir = './'
    if pdf_gen:
        fig = plt.figure(figsize=(11, 8))
        ax1 = fig.add_subplot(111)
        ax1.set_title(
            str(npca) + ' PCAs cover ' +
            str(np.round(total_variance * 100, 2)) + '% of total variance')
        for i in xrange(npca):
            #plotting
            pcs[:, i].plot(linewidth=2, ax=ax1, label='pca ' + str(i + 1))

        ax1.set_xlabel('time [years]')
        ax1.set_ylabel('QBO index')
        ax1.set_title('')
        ax1.legend(loc='best')
        plt.savefig(reg_dir + 'qbo_' + what_re + '_pcas.pdf',
                    bbox_inches='tight')
        plt.close(fig)

    if nc_gen:
        #save to netcdf
        #print(pcs[:,0])
        for i in range(npca):
            pcs_ds = pcs[:, i].to_dataset(name='index')
            pcs_ds.to_netcdf(reg_dir + r'qbo_' + what_re + '_pc' + str(i + 1) +
                             pripona_nc)
Ejemplo n.º 14
0
Archivo: EOF.py Proyecto: ilebras/OSNAP
         str(int(solver_along.varianceFraction()[0].values * 100)) +
         '% of variance')
    plot(squeeze(solver_along.eofs()[1, :]),
         -dat.depth[::skipnum],
         label='mode 2: ' +
         str(int(solver_along.varianceFraction()[1].values * 100)) +
         '% of variance')
    gca().set_yticklabels('')
    xlabel('across-stream velocity [m/s]')
    xlim([-1, 1])
    legend(loc=(0.2, 1.01))
    suptitle('CF' + str(ii + 1))
    savefig('../figures/VertModes/eof_cf' + str(ii + 1) + '.png')

    figure(figsize=(12, 4))
    solver_across.pcs().sel(mode=0).plot(label='along-stream')
    solver_along.pcs().sel(mode=0).plot(label='across-stream')
    legend()
    title('CF' + str(ii + 1) + ' first mode time series')
    savefig('../figures/VertModes/eof_1804/eof_cf' + str(ii + 1) +
            '_tseries.png')
    #
    # figure(figsize=(12,4))
    # solver.pcs().sel(mode=1).plot()
    # title('CF'+str(ii+1)+' second mode time series')

############################################################################
## Fit exponential to mean density and see what the solution looks like!
############################################################################

Ejemplo n.º 15
0
        valid = ~np.isnan(sst_obs[:,la,lo])
        if (valid.any()==True):
            sst_obs[:,la,lo] = signal.detrend(sst_obs[:,la,lo], axis=0, \
                                               type='linear')
        elif (valid.all()==False):
            sst_obs[:,la,lo] = np.nan
'''

# EOF for model
coslat_mdl = np.cos(np.deg2rad(sst_mdl.coords['lat'].values))
wgts_mdl = np.sqrt(coslat_mdl)[..., np.newaxis]
solver_mdl = Eof(sst_mdl, weights=wgts_mdl, center=True)
lambdas_mdl=solver_mdl.eigenvalues()
vf_mdl = solver_mdl.varianceFraction()
Nerror_mdl = solver_mdl.northTest(vfscaled=True)
pcs_mdl = solver_mdl.pcs() #(time, mode)
eofs_mdl = solver_mdl.eofs()
# EOF for obs
coslat_obs = np.cos(np.deg2rad(sst_obs.coords['lat'].values))
wgts_obs = np.sqrt(coslat_obs)[..., np.newaxis]
solver_obs = Eof(sst_obs, weights=wgts_obs, center=True)
lambdas_obs=solver_obs.eigenvalues()
vf_obs = solver_obs.varianceFraction()
Nerror_obs = solver_obs.northTest(vfscaled=True)
pcs_obs = solver_obs.pcs() #(time, mode)
eofs_obs = solver_obs.eofs()




## plotting
Ejemplo n.º 16
0
    ds_spring_sib2 = ds_spring_sib2.sel(spring=idx)
    ds_spring = ds_spring.sel(spring=idx)


    plt.scatter(ds_spring_sib2.min('time').to_dataframe(), ds_spring.min('time').to_dataframe())



    #######################
    # PCA Analysis
    #######################

    nbpcs = 3

    solver_sib2 = Eof(ds_spring_sib2.dropna(dim="time", how="all"))
    pcas_sib2 = solver_sib2.pcs(npcs=nbpcs, pcscaling=1)
    eofs_sib2 = solver_sib2.eofs(neofs=nbpcs, eofscaling=1)

    solver = Eof(ds_spring.dropna(dim="time", how="all"))
    pcas = solver.pcs(npcs=nbpcs, pcscaling=1)
    eofs = solver.eofs(neofs=nbpcs, eofscaling=1)

    fig, axes = plt.subplots(3, 4, figsize=(20, 20))
    pcas.to_dataframe().unstack().plot(ax=axes[0,0])
    pcas_sib2.to_dataframe().unstack().plot(ax=axes[0,1])

    df_eofs = eofs.to_dataframe().unstack().T
    df_eofs_sib2 = eofs_sib2.to_dataframe().unstack().T
    df_eofs.index = df_eofs.index.levels[1]
    df_eofs_sib2.index = df_eofs_sib2.index.levels[1]
Ejemplo n.º 17
0
(2) Compare to see differences
'''

start5 = time.time()

os.chdir("/home/ubuntu")
lon = 180
lat = 90
dim = lon * lat
months = 24

data = np.resize(x1, [dim, months])

solver = Eof(xr.DataArray(anomalies.data, dims=['time', 'lat', 'lon']))

pcs = solver.pcs(npcs=3, pcscaling=1)
eofs = solver.eofs(neofs=5, eofscaling=1)

variance_fractions = solver.varianceFraction()
variance_fractions = solver.varianceFraction(neigs=3)
print(variance_fractions)

myFile1 = open('PC1.csv', 'w')
with myFile1:
    writer = csv.writer(myFile1)
    writer.writerows(eofs[0, :, :].data)

myFile2 = open('PC2.csv', 'w')
with myFile2:
    writer = csv.writer(myFile2)
    writer.writerows(eofs[1, :, :].data)
Ejemplo n.º 18
0
import xarray as xr
from eofs.xarray import Eof
import datetime
import os

# Read preprocessed data.
DATA_FILE = "/LFASGI/sandroal/data_sets/GIMMS/ppdata_ndvi.nc" 
DS = xr.open_dataset(DATA_FILE)

# Create an EOF solver to do the EOF analysis. Memory intensive operation.
solver = Eof(DS.ndvi)

# Retrieve EOFs, principal component time series, fraction of explained 
# variance, and eigenvalues as xarray DataArray objects for all modes.
EOFs = solver.eofs() 
PCs = solver.pcs()  
FRACs = solver.varianceFraction() 
EIGs = solver.eigenvalues() 

# Attributes for xarray DataSet objects.
attrs = {}
attrs["Description"] = "Empirical orthogonal functions to NDVI (GIMMS) " + \
                       "in its original temporal and spatial resolutions"
attrs["Build"] = "By Alex Araujo"
attrs["Date"] = datetime.datetime.now().strftime("%B %d, %Y; %Hh:%Mmin:%Ss")
attrs["Source"] = os.path.abspath(__file__)

# Set these attributes to results. Must transform from xarray DataArray to 
# DataSets before exporting results as netcdf files.
DAs = [EOFs, PCs, FRACs, EIGs]
names = ["eofs", "pcs", "fracs", "eigs"]
Ejemplo n.º 19
0
# Read SST anomalies using the xarray module. The file contains November-March
# averages of SST anomaly in the central and northern Pacific.
filename = example_data_path('sst_ndjfm_anom.nc')
sst = xr.open_dataset(filename)['sst']

# Create an EOF solver to do the EOF analysis. Square-root of cosine of
# latitude weights are applied before the computation of EOFs.
coslat = np.cos(np.deg2rad(sst.coords['latitude'].values))
wgts = np.sqrt(coslat)[..., np.newaxis]
solver = Eof(sst, weights=wgts)

# Retrieve the leading EOF, expressed as the correlation between the leading
# PC time series and the input SST anomalies at each grid point, and the
# leading PC time series itself.
eof1 = solver.eofsAsCorrelation(neofs=1)
pc1 = solver.pcs(npcs=1, pcscaling=1)

# Plot the leading EOF expressed as correlation in the Pacific domain.
clevs = np.linspace(-1, 1, 11)
ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=190))
fill = eof1[0].plot.contourf(ax=ax, levels=clevs, cmap=plt.cm.RdBu_r,
                             add_colorbar=False, transform=ccrs.PlateCarree())
ax.add_feature(cfeature.LAND, facecolor='w', edgecolor='k')
cb = plt.colorbar(fill, orientation='horizontal')
cb.set_label('correlation coefficient', fontsize=12)
ax.set_title('EOF1 expressed as correlation', fontsize=16)

# Plot the leading PC time series.
plt.figure()
pc1[:, 0].plot(color='b', linewidth=2)
ax = plt.gca()
Ejemplo n.º 20
0
sst = sst[:, :-1, :-1]
tendsst = tendsst[:, :-1, :-1]

tot_area = np.sum(areas)

weights = areas / tot_area

#Calculate EOFs for SST, Qs and Qo. Should weight by area?
solver = Eof(sst, weights=weights)
sst_eof = solver.eofs(neofs=3, eofscaling=2)

sst_eof_varfracs = solver.varianceFraction()

solver = Eof(Qr, weights=weights)
Qo_eof = solver.eofs(neofs=3, eofscaling=2)
Qo_pc = solver.pcs(npcs=3, pcscaling=2)

Qo_eof_varfracs = solver.varianceFraction()

Qo_rec = solver.reconstructedField(5)

Qo_rec_var = Qo_rec.var(dim='time')

#get projection (pseudo-PCs) associated with Qo EOFs
# Qo_eof_projsst = solver.projectField(sst, neofs=3)
# Qo_eof_projQo = solver.projectField(Qr, neofs=3)

# Qo_eof_projsst_pcs = Qo_eof_projsst[:,0]
# Qo_eof_projQo_pcs = Qo_eof_projQo[:,0]

#standardize the pseudo-PCS