def eofs_as(dat): A = climatologia_xarray(dat['curl']).values global land EC, WC, land = get_coasts(dat.lat, dat.lon) msk = np.empty(np.shape(A)) for i in range(0, len(A[:,0,0])): msk[i,:,:] = land B = np.ma.array(A, mask=msk) from get_eddof import get_eddof edof = np.empty([len(dat.lat), len(dat.lon)]) for i in range(0, len(dat.lat)): for j in range(0, len(dat.lon)): if msk[0,i,j] == False: edof[i,j] = get_eddof(B[:,i,j]) else: edof[i,j] = np.nan dof = int(np.nanmean(edof)) coslat = np.cos(np.deg2rad(dat.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(B, center=True, weights=wgts, ddof=dof) eof = solver.eofs(neofs=10, eofscaling=2) pc = solver.pcs(npcs=10, pcscaling=1) varfrac = solver.varianceFraction() eigvals = solver.eigenvalues() x, y = np.meshgrid(dat.lon, dat.lat) return eof, pc, varfrac, x, y, edof
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22): # load the already calculated residuals resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n) # note that we don't have to subtract the annual cycle any more as the # residuals are with respect to a smoothed version of the monthly ssts resid_mon_fh = netcdf_file(resid_fname, 'r') sst_var = resid_mon_fh.variables["sst"] lats_var = resid_mon_fh.variables["latitude"] lons_var = resid_mon_fh.variables["longitude"] attrs = sst_var._attributes mv = attrs["_FillValue"] ssts = numpy.array(sst_var[:]) sst_resids = numpy.ma.masked_less(ssts, -1000) # calculate the EOFs and PCs # take the eofs coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(sst_resids, center=True, weights=wgts) pcs = eof_solver.pcs(npcs=n_eofs) eofs = eof_solver.eofs(neofs=n_eofs) varfrac = eof_solver.varianceFraction(neigs=n_eofs) evs = eof_solver.eigenvalues(neigs=n_eofs) evs = evs.reshape([1,evs.shape[0]]) print evs.shape # get the output names out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) out_pcs_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) out_evs_fname = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n) # save the eofs and pcs save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var) out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]]) save_pcs(out_pcs_fname, out_pcs, attrs) save_eigenvalues(out_evs_fname, evs, attrs) resid_mon_fh.close()
filename1 = 'sine_wave_data1.nc' filename2 = '2D_bulls_eyes.nc' a = Dataset(filename1, mode='r') b = Dataset(filename2, mode='r') dataset1 = xr.open_dataset(xr.backends.NetCDF4DataStore(a)) dataset2 = xr.open_dataset(xr.backends.NetCDF4DataStore(b)) sinData = dataset1['data'].T sinData = (sinData - sinData.mean(axis=0)) / sinData.std(axis=0) sinData = sinData.values bullseyeData = dataset2['data'] #%% EOF analysis solver = Eof(sinData) eigenvalues = solver.eigenvalues() # Get eigenvalues EOFs = solver.eofs(eofscaling=0) # Get EOFs EOFs_reg = solver.eofsAsCorrelation( ) # Get EOFs as correlation b/w PCs & orig data PCs = solver.pcs(pcscaling=1) # Get PCs # Get variance explained and # of PCs VarExplain = np.round(solver.varianceFraction() * 100, 1) numPCs2Keep = cumSUM(VarExplain, 90) # Calculate EOFs EOF1 = EOFs[0, :] * np.sqrt(eigenvalues[0]) # Get EOF 1 & scale it EOF2 = EOFs[1, :] * np.sqrt(eigenvalues[1]) # Get EOF 2 & scale it EOF1_reg = EOFs_reg[0, :] EOF2_reg = EOFs_reg[1, :] stdPC1 = PCs[:, 0]
def PCA_Analyze(name, framestart, framestop, destination, numberpca=None): """Completes unweighted and unscaled Principle Component Analyzis on data. **Arguments:** *name* The complete name of the numpy array file from the directory where the processing program is found. Put as string (include quotes). Must be an npz file. Specifically, this is the numpy array file that has the UU,VV,WW data in it. Instead of putting the name of the numpy file (since there are a large number of them) input an asterisk (*). *framestart* The first frame number in the sequence of frames to be analyzed. *framestop* The last frame number in the sequence of frames to be analyzed. *destination* File location for the graph to be saved. Put in quotes. Example: 'out/Vertical Velocity/arrays/another/graph.png' **Optional keyword arguments:** *numberpca* Number of valid eigenvalues/PCAs to be calculated. Automatically set to determine all of them. **Example:** PCA_Analyze('../out/velocity.npz',0,5,'../out/mvavgtur.png',numberpca=4) """ #####Creates Lists and Dictionaries to be used later##### UU = {} lUU = [] VV = {} lVV = [] #####Extracts numpy array data and puts it into the dictionaries for use in analysis##### for np_name in glob.glob(name): with np.load(np_name) as data: UU[re.findall(r'\d+', np_name)[-1]] = data['UU'] VV[re.findall(r'\d+', np_name)[-1]] = data['VV'] #####Takes the data from the dictionaries, sorts them, and then puts them into lists. Then turns the list into a numpy array.##### uframes = UU.keys() uframes.sort() vframes = VV.keys() vframes.sort() for i in uframes: u = UU[i] lUU.append(u) for i in vframes: v = VV[i] lVV.append(v) luu = np.asarray(lUU) lvv = np.asarray(lVV) #####Puts the U and V components into one complex array with the U as the real component and the V as the imaginary##### velgrid = luu + (1.j * lvv) #####PCA##### solver = Eof(velgrid[framestart:framestop, :, :]) pca = solver.eofs(neofs=numberpca) eigen = solver.eigenvalues(neigs=numberpca) pca = np.array(pca) eigen = np.array([eigen]) intermed = eigen[0].shape length = intermed[0] print length #####Graphs each PCA##### c = 0 for i in range(length): UU = pca.real[i, :, :] VV = pca.imag[i, :, :] eig = np.array_str(eigen[0][i]) (a, b) = pca[0].shape y, x = np.mgrid[0:a, 0:b] plt.figure() plt.streamplot(x, y, UU * -1., VV * -1., cmap='nipy_spectral') plt.suptitle("PCA Analysis. Associated Percent Variance: ") plt.title(eig, fontsize=10) plt.savefig(destination % i) plt.close() c += 1
fldpa = fldpa1 coslat = np.cos(np.deg2rad(lat)).clip(0.0, 1.0) # why square root of cos lat? (better for EOF for some reason.) wgts = np.sqrt(coslat)[..., np.newaxis] solverc = Eof(fldca, weights=wgts) if docorr: eof1c = solverc.eofsAsCorrelation(neofs=enum) eof1c = eof1c[enum - 1, ...] else: eof1c = solverc.eofsAsCovariance(neofs=enum) eof1c = eof1c[enum - 1, ...] eof1c = eof1c.squeeze() eigsc = solverc.eigenvalues() vexpc = eigsc[enum - 1] / eigsc.sum() * 100 # percent variance explained fig, axs = plt.subplots(1, 4) fig.set_size_inches(12, 5) ax = axs[0] cplt.kemmap(eof1c, lat, lon, type=type, title=sim + " control EOF" + str(enum), axis=ax, cmin=cmin, cmax=cmax) ax.set_ylabel(str(np.round(vexpc))) solverp = Eof(fldpa, weights=wgts) if docorr: eof1p = solverp.eofsAsCorrelation(neofs=enum) eof1p = eof1p[enum - 1, ...] else: eof1p = solverp.eofsAsCovariance(neofs=enum) eof1p = eof1p[enum - 1, ...]
with np.load(np_name) as data: UU[np_name[29:34]] = data['UU'] VV[np_name[29:34]] = data['VV'] uframes = UU.keys() uframes.sort() vframes = VV.keys() vframes.sort() for i in uframes: u = UU[i] lUU.append(u) for i in vframes: v = VV[i] lVV.append(v) luu = np.asarray(lUU) lvv = np.asarray(lVV) ####PCA#### velgrid = luu + (1.j * lvv) solver = Eof(velgrid[2118:2358, :, :]) #*# Choose which frames pca = solver.eofsAsCovariance(neofs=4) #*# neofs and type of eofs eigen = solver.eigenvalues(neigs=4) #*# neigs ####Save new numpy array for later graphing#### np.savez('PCA.npz', pca=pca, eigen=eigen) #*#
solver_anom = Eof(temps_anom) fig = plt.figure() for i, eo in enumerate(solver.eofs()): plt.plot(eo, alts, label=i) plt.legend() fig.savefig(cartou + 'eofs_temps.pdf') fig = plt.figure() for i, eo in enumerate(solver_anom.eofs()): plt.plot(eo, alts, label=i) plt.legend() fig.savefig(cartou + 'eofs_temps_anom.pdf') fig = plt.figure() plt.bar(np.arange(6), solver.eigenvalues()) fig.savefig(cartou + 'eigenvalues_temps.pdf') fig = plt.figure() plt.bar(np.arange(6), solver_anom.eigenvalues()) fig.savefig(cartou + 'eigenvalues_temps_anom.pdf') fig = plt.figure() plt.bar(np.arange(6), solver.varianceFraction()) fig.savefig(cartou + 'varfrac_temps.pdf') fig = plt.figure() plt.bar(np.arange(6), solver_anom.varianceFraction()) fig.savefig(cartou + 'varfrac_temps_anom.pdf') fig = plt.figure()
except ValueError: sys.exit("Exiting: timeseries have different lengths") if args.normalize: eof_data_std = np.std(eof_data, axis=1) eof_data = eof_data.T / np.std(eof_data, axis=1) else: #transpose so time is first dimension eof_data = eof_data.T # Crete an EOF solver to do the EOF analysis. No weights # First dimension is assumed time by program... not true if timseries is of interest, print("Solving for n={} modes".format(args.eof_num)) solver = Eof(eof_data, center=False) pcs = solver.pcs(npcs=args.eof_num) eigval = solver.eigenvalues(neigs=args.eof_num) varfrac = solver.varianceFraction(neigs=args.eof_num) eofs = solver.eofs(neofs=args.eof_num) eofcorr = solver.eofsAsCorrelation(neofs=args.eof_num) eofcov = solver.eofsAsCovariance(neofs=args.eof_num) """---------------------------------Report-----------------------------------""" ### Print Select Results to file outfile = args.outfile + '.txt' print("EOF Results:", file=open(outfile, "w")) print("------------", file=open(outfile, "a")) print("File path: {}".format("/".join(filename.split('/')[:-1])), file=open(outfile, "a")) for key, filename in (files.items()): print("Files input: {}".format(filename.split('/')[-1]), file=open(outfile, "a"))
weightf = np.repeat(way[:, np.newaxis], len(lon_at), axis=1) # add weighting function (because of the latitude) atemp_era5 = signal.detrend( hgt500_era5, axis=0) # # linearly detrend 500 hPa geopotential height data atemp_era5_pre = np.zeros((nt * ny, nlat, nlon)) for iy in np.arange(ny): atemp_era5_pre[iy * nt:iy * nt + nt] = atemp_era5[iy] * weightf[None, :, :] ### we did not using n-day moving average as some other studies do partly because the original goal for us (Jiacheng & Zhuo) ### is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible ### to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals ### To be consistent with other reseachers, one can add n-day moving code above. # EOF analysis solver = Eof(atemp_era5_pre, center=True) pcs = solver.pcs() mid_eig = solver.eigenvalues() mid_eofs = solver.eofs() eofs = solver.eofs() ### Print explained variance when using 4 EOFs #var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig)) #print(var_explained_era5[3]) #0.5316330300316366 reconstruction_era5 = solver.reconstructedField( noef) #Using 4 leading EOFs to reconstruct hgt500 field ### The Kmeans method needs a 2-D data format: number of days x horizontal fields atemp_era5_post = np.zeros((ny * nt, nlat * nlon)) for i in np.arange(ny * nt): atemp_era5_post[i] = (reconstruction_era5[i]).flatten()