Ejemplo n.º 1
0
def eofs_as(dat):
    A = climatologia_xarray(dat['curl']).values
    global land
    EC, WC, land = get_coasts(dat.lat, dat.lon)

    msk = np.empty(np.shape(A))
    for i in range(0, len(A[:,0,0])):
        msk[i,:,:] = land
        B = np.ma.array(A, mask=msk)
    from get_eddof import get_eddof
    edof = np.empty([len(dat.lat), len(dat.lon)])
    for i in range(0, len(dat.lat)):
        for j in range(0, len(dat.lon)):
            if msk[0,i,j] == False:
                edof[i,j] = get_eddof(B[:,i,j])
            else:
                edof[i,j] = np.nan

    dof = int(np.nanmean(edof))
    coslat = np.cos(np.deg2rad(dat.lat.values)).clip(0., 1.)
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(B, center=True, weights=wgts, ddof=dof)

    eof = solver.eofs(neofs=10, eofscaling=2)
    pc = solver.pcs(npcs=10, pcscaling=1)
    varfrac = solver.varianceFraction()
    eigvals = solver.eigenvalues()

    x, y = np.meshgrid(dat.lon, dat.lat)

    return eof, pc, varfrac, x, y, edof
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22):
    # load the already calculated residuals
    resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n)
    # note that we don't have to subtract the annual cycle any more as the
    # residuals are with respect to a smoothed version of the monthly ssts
    
    resid_mon_fh = netcdf_file(resid_fname, 'r')
    sst_var = resid_mon_fh.variables["sst"]
    lats_var = resid_mon_fh.variables["latitude"]
    lons_var = resid_mon_fh.variables["longitude"]
    attrs = sst_var._attributes
    mv = attrs["_FillValue"]
    ssts = numpy.array(sst_var[:])
    sst_resids = numpy.ma.masked_less(ssts, -1000)
    
    # calculate the EOFs and PCs
    # take the eofs
    coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.)
    wgts = numpy.sqrt(coslat)[..., numpy.newaxis]
    eof_solver = Eof(sst_resids, center=True, weights=wgts)
    pcs = eof_solver.pcs(npcs=n_eofs)
    eofs = eof_solver.eofs(neofs=n_eofs)
    varfrac = eof_solver.varianceFraction(neigs=n_eofs)
    evs = eof_solver.eigenvalues(neigs=n_eofs)
    evs = evs.reshape([1,evs.shape[0]])
    print evs.shape
    
    # get the output names
    out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n)
    out_pcs_fname  = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n)
    out_evs_fname  = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n)

    # save the eofs and pcs
    save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var)
    out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]])
    save_pcs(out_pcs_fname, out_pcs, attrs)
    save_eigenvalues(out_evs_fname, evs, attrs)
    resid_mon_fh.close()
Ejemplo n.º 3
0

filename1 = 'sine_wave_data1.nc'
filename2 = '2D_bulls_eyes.nc'
a = Dataset(filename1, mode='r')
b = Dataset(filename2, mode='r')
dataset1 = xr.open_dataset(xr.backends.NetCDF4DataStore(a))
dataset2 = xr.open_dataset(xr.backends.NetCDF4DataStore(b))
sinData = dataset1['data'].T
sinData = (sinData - sinData.mean(axis=0)) / sinData.std(axis=0)
sinData = sinData.values
bullseyeData = dataset2['data']

#%% EOF analysis
solver = Eof(sinData)
eigenvalues = solver.eigenvalues()  # Get eigenvalues
EOFs = solver.eofs(eofscaling=0)  # Get EOFs
EOFs_reg = solver.eofsAsCorrelation(
)  # Get EOFs as correlation b/w PCs &  orig data
PCs = solver.pcs(pcscaling=1)  # Get PCs

# Get variance explained and # of PCs
VarExplain = np.round(solver.varianceFraction() * 100, 1)
numPCs2Keep = cumSUM(VarExplain, 90)

# Calculate EOFs
EOF1 = EOFs[0, :] * np.sqrt(eigenvalues[0])  # Get EOF 1 & scale it
EOF2 = EOFs[1, :] * np.sqrt(eigenvalues[1])  # Get EOF 2 & scale it
EOF1_reg = EOFs_reg[0, :]
EOF2_reg = EOFs_reg[1, :]
stdPC1 = PCs[:, 0]
Ejemplo n.º 4
0
def PCA_Analyze(name, framestart, framestop, destination, numberpca=None):
    """Completes unweighted and unscaled Principle Component Analyzis on data. 

    **Arguments:**

    *name*
            The complete name of the numpy array file from the directory where the processing program is
            found. Put as string (include quotes). Must be an npz file. Specifically, this is the numpy
            array file that has the UU,VV,WW data in it. Instead of putting the name of the numpy file
            (since there are a large number of them) input an asterisk (*).
            
    *framestart*
            The first frame number in the sequence of frames to be analyzed.

    *framestop*
            The last frame number in the sequence of frames to be analyzed.

    
    *destination*
            File location for the graph to be saved. Put in quotes. Example:
            'out/Vertical Velocity/arrays/another/graph.png'
            
    
    **Optional keyword arguments:**

    *numberpca*
            Number of valid eigenvalues/PCAs to be calculated. Automatically set to determine all of them.
            
   **Example:**
            PCA_Analyze('../out/velocity.npz',0,5,'../out/mvavgtur.png',numberpca=4)
                    
    """
    #####Creates Lists and Dictionaries to be used later#####
    UU = {}
    lUU = []
    VV = {}
    lVV = []

    #####Extracts numpy array data and puts it into the dictionaries for use in analysis#####
    for np_name in glob.glob(name):
        with np.load(np_name) as data:
            UU[re.findall(r'\d+', np_name)[-1]] = data['UU']
            VV[re.findall(r'\d+', np_name)[-1]] = data['VV']

    #####Takes the data from the dictionaries, sorts them, and then puts them into lists. Then turns the list into a numpy array.#####
    uframes = UU.keys()
    uframes.sort()
    vframes = VV.keys()
    vframes.sort()

    for i in uframes:
        u = UU[i]
        lUU.append(u)

    for i in vframes:
        v = VV[i]
        lVV.append(v)

    luu = np.asarray(lUU)

    lvv = np.asarray(lVV)

    #####Puts the U and V components into one complex array with the U as the real component and the V as the imaginary#####
    velgrid = luu + (1.j * lvv)

    #####PCA#####
    solver = Eof(velgrid[framestart:framestop, :, :])
    pca = solver.eofs(neofs=numberpca)
    eigen = solver.eigenvalues(neigs=numberpca)

    pca = np.array(pca)
    eigen = np.array([eigen])
    intermed = eigen[0].shape
    length = intermed[0]
    print length
    #####Graphs each PCA#####
    c = 0
    for i in range(length):
        UU = pca.real[i, :, :]
        VV = pca.imag[i, :, :]
        eig = np.array_str(eigen[0][i])
        (a, b) = pca[0].shape
        y, x = np.mgrid[0:a, 0:b]
        plt.figure()
        plt.streamplot(x, y, UU * -1., VV * -1., cmap='nipy_spectral')
        plt.suptitle("PCA Analysis. Associated Percent Variance: ")
        plt.title(eig, fontsize=10)
        plt.savefig(destination % i)
        plt.close()
        c += 1
Ejemplo n.º 5
0
    fldpa = fldpa1

coslat = np.cos(np.deg2rad(lat)).clip(0.0, 1.0)  # why square root of cos lat? (better for EOF for some reason.)
wgts = np.sqrt(coslat)[..., np.newaxis]


solverc = Eof(fldca, weights=wgts)
if docorr:
    eof1c = solverc.eofsAsCorrelation(neofs=enum)
    eof1c = eof1c[enum - 1, ...]
else:
    eof1c = solverc.eofsAsCovariance(neofs=enum)
    eof1c = eof1c[enum - 1, ...]

eof1c = eof1c.squeeze()
eigsc = solverc.eigenvalues()
vexpc = eigsc[enum - 1] / eigsc.sum() * 100  # percent variance explained

fig, axs = plt.subplots(1, 4)
fig.set_size_inches(12, 5)
ax = axs[0]
cplt.kemmap(eof1c, lat, lon, type=type, title=sim + " control EOF" + str(enum), axis=ax, cmin=cmin, cmax=cmax)
ax.set_ylabel(str(np.round(vexpc)))

solverp = Eof(fldpa, weights=wgts)
if docorr:
    eof1p = solverp.eofsAsCorrelation(neofs=enum)
    eof1p = eof1p[enum - 1, ...]
else:
    eof1p = solverp.eofsAsCovariance(neofs=enum)
    eof1p = eof1p[enum - 1, ...]
Ejemplo n.º 6
0
    with np.load(np_name) as data:
        UU[np_name[29:34]] = data['UU']
        VV[np_name[29:34]] = data['VV']

uframes = UU.keys()
uframes.sort()
vframes = VV.keys()
vframes.sort()

for i in uframes:
    u = UU[i]
    lUU.append(u)

for i in vframes:
    v = VV[i]
    lVV.append(v)

luu = np.asarray(lUU)
lvv = np.asarray(lVV)

####PCA####
velgrid = luu + (1.j * lvv)

solver = Eof(velgrid[2118:2358, :, :])  #*# Choose which frames
pca = solver.eofsAsCovariance(neofs=4)  #*# neofs and type of eofs
eigen = solver.eigenvalues(neigs=4)  #*# neigs

####Save new numpy array for later graphing####

np.savez('PCA.npz', pca=pca, eigen=eigen)  #*#
Ejemplo n.º 7
0
solver_anom = Eof(temps_anom)

fig = plt.figure()
for i, eo in enumerate(solver.eofs()):
    plt.plot(eo, alts, label=i)
plt.legend()
fig.savefig(cartou + 'eofs_temps.pdf')

fig = plt.figure()
for i, eo in enumerate(solver_anom.eofs()):
    plt.plot(eo, alts, label=i)
plt.legend()
fig.savefig(cartou + 'eofs_temps_anom.pdf')

fig = plt.figure()
plt.bar(np.arange(6), solver.eigenvalues())
fig.savefig(cartou + 'eigenvalues_temps.pdf')

fig = plt.figure()
plt.bar(np.arange(6), solver_anom.eigenvalues())
fig.savefig(cartou + 'eigenvalues_temps_anom.pdf')

fig = plt.figure()
plt.bar(np.arange(6), solver.varianceFraction())
fig.savefig(cartou + 'varfrac_temps.pdf')

fig = plt.figure()
plt.bar(np.arange(6), solver_anom.varianceFraction())
fig.savefig(cartou + 'varfrac_temps_anom.pdf')

fig = plt.figure()
Ejemplo n.º 8
0
            except ValueError:
                sys.exit("Exiting: timeseries have different lengths")

    if args.normalize:
        eof_data_std = np.std(eof_data, axis=1)
        eof_data = eof_data.T / np.std(eof_data, axis=1)
    else:
        #transpose so time is first dimension
        eof_data = eof_data.T

# Crete an EOF solver to do the EOF analysis.  No weights
# First dimension is assumed time by program... not true if timseries is of interest,
print("Solving for n={} modes".format(args.eof_num))
solver = Eof(eof_data, center=False)
pcs = solver.pcs(npcs=args.eof_num)
eigval = solver.eigenvalues(neigs=args.eof_num)
varfrac = solver.varianceFraction(neigs=args.eof_num)
eofs = solver.eofs(neofs=args.eof_num)
eofcorr = solver.eofsAsCorrelation(neofs=args.eof_num)
eofcov = solver.eofsAsCovariance(neofs=args.eof_num)
"""---------------------------------Report-----------------------------------"""
### Print Select Results to file
outfile = args.outfile + '.txt'
print("EOF Results:", file=open(outfile, "w"))
print("------------", file=open(outfile, "a"))

print("File path: {}".format("/".join(filename.split('/')[:-1])),
      file=open(outfile, "a"))
for key, filename in (files.items()):
    print("Files input: {}".format(filename.split('/')[-1]),
          file=open(outfile, "a"))
Ejemplo n.º 9
0
weightf = np.repeat(way[:, np.newaxis], len(lon_at),
                    axis=1)  # add weighting function (because of the latitude)
atemp_era5 = signal.detrend(
    hgt500_era5, axis=0)  # # linearly detrend 500 hPa geopotential height data
atemp_era5_pre = np.zeros((nt * ny, nlat, nlon))
for iy in np.arange(ny):
    atemp_era5_pre[iy * nt:iy * nt + nt] = atemp_era5[iy] * weightf[None, :, :]

### we did not using n-day moving average as some other studies do partly because the original goal for us (Jiacheng & Zhuo)
###   is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible
###   to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals
### To be consistent with other reseachers, one can add n-day moving code above.
# EOF analysis
solver = Eof(atemp_era5_pre, center=True)
pcs = solver.pcs()
mid_eig = solver.eigenvalues()
mid_eofs = solver.eofs()
eofs = solver.eofs()

### Print explained variance when using 4 EOFs
#var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig))
#print(var_explained_era5[3])
#0.5316330300316366

reconstruction_era5 = solver.reconstructedField(
    noef)  #Using 4 leading EOFs to reconstruct hgt500 field

### The Kmeans method needs a 2-D data format: number of days x horizontal fields
atemp_era5_post = np.zeros((ny * nt, nlat * nlon))
for i in np.arange(ny * nt):
    atemp_era5_post[i] = (reconstruction_era5[i]).flatten()