def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400): # load in the PCs and EOFs histo_sy = 1899 histo_ey = 2010 # monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) # monthly_pcs = load_data(monthly_pc_fname) # monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) # monthly_eofs = load_sst_data(monthly_eof_fname, "sst") monthly_residuals_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(monthly_residuals_fname, 'r') attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] monthly_residuals = numpy.ma.masked_equal(var[:], mv) # weights for reconstruction / projection coslat = numpy.cos(numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.)) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(monthly_residuals, center=False, weights=wgts) monthly_pcs = eof_solver.pcs(npcs=n_pcs) monthly_eofs = eof_solver.eofs(neofs=n_pcs) # get the explanation of variance and calculate the scalar from it M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs)) # get the number of months to predict the PCs for and create the storage histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods() n_mnths = 12*(rcp_ey - histo_sy) predicted_pcs = numpy.zeros([n_mnths+12, n_pcs], "f") # fit an AR process to the first ~20 pcs for pc in range(0, n_pcs): # create the model arn = ARN(monthly_pcs[:,pc].squeeze()) # fit the model to the data res = arn.fit() arp = res.k_ar # create a timeseries of predicted values predicted_pcs[:,pc] = M*arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths+arp+11) # reconstruct the field and return # reconstruct the field monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts) return monthly_intvar
def smooth(variable, window) : from axis import Axes, TimeAxis from variable import Variable if len(variable.shape) > 1 : raise NotImplementedError try : variable.dts except : raise NotImplementedError if window%2 == 0 : raise NotImplementedError mask = np.ones(window) #mask[int(window/2)] = 1 mask /= window*1.0 newAxes = Axes() newAxes['time'] = TimeAxis(variable.dts[int(window/2):-int(window/2)]) return Variable( data = np.convolve(variable.data, mask, mode='valid'), axes = newAxes, metadata = variable.metadata) from eofs.standard import Eof wgts = np.cos(variable.lats*np.pi/180)**0.5 solver = Eof(variable.data, weights = wgts[:, None]) eof1 = solver.eofs(eofscaling=2, neofs=1) print solver.varianceFraction(neigs=1)[0]*100, '%' output = variable[0].empty() output.data = eof1[0] return output
def eof(variable) : from eofs.standard import Eof wgts = np.cos(variable.lats*np.pi/180)**0.5 solver = Eof(variable.data, weights = wgts[:, None]) eof1 = solver.eofs(eofscaling=2, neofs=1) print solver.varianceFraction(neigs=1)[0]*100, '%' output = variable[0].empty() output.data = eof1[0] return output
def eof_computation(var, varunits, lat, lon): #---------------------------------------------------------------------------------------- print( '____________________________________________________________________________________________________________________' ) print('Computing the EOFs and PCs') #---------------------------------------------------------------------------------------- # EOF analysis of a data array with spatial dimensions that # represent latitude and longitude with weighting. In this example # the data array is dimensioned (ntime, nlat, nlon), and in order # for the latitude weights to be broadcastable to this shape, an # extra length-1 dimension is added to the end: weights_array = np.sqrt(np.cos(np.deg2rad(lat)))[:, np.newaxis] start = datetime.datetime.now() solver = Eof(var, weights=weights_array) end = datetime.datetime.now() print('EOF computation took me %s seconds' % (end - start)) #ALL VARIANCE FRACTIONS varfrac = solver.varianceFraction() acc = np.cumsum(varfrac * 100) #------------------------------------------PCs unscaled (case 0 of scaling) pcs_unscal0 = solver.pcs() #------------------------------------------EOFs unscaled (case 0 of scaling) eofs_unscal0 = solver.eofs() #------------------------------------------PCs scaled (case 1 of scaling) pcs_scal1 = solver.pcs(pcscaling=1) #------------------------------------------EOFs scaled (case 2 of scaling) eofs_scal2 = solver.eofs(eofscaling=2) return solver, pcs_scal1, eofs_scal2, pcs_unscal0, eofs_unscal0, varfrac
def calculate_EAsia_rm_eofs(data, lats, lons, lat_min=20, lat_max=50, lon_min=110, lon_max=180): """ Calculates EOFs over the East Asian region. Regresses the principal components back onto the original data""" lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) data_EAsia = data[:, lat_mask, :][:, :, lon_mask] # calculate EOFs coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(data_EAsia, weights=wgts) var_frac = solver.varianceFraction() pcs = solver.pcs(npcs=3, pcscaling=1) # regress first modes onto original data reg_pc1, pval_pc1 = regress_map.regress_map(pcs[:, 0], data, map_type='regress') reg_pc2, pval_pc2 = regress_map.regress_map(pcs[:, 1], data, map_type='regress') reg_pc3, pval_pc3 = regress_map.regress_map(pcs[:, 2], data, map_type='regress') return var_frac, reg_pc1, pval_pc1, reg_pc2, pval_pc2, reg_pc3, pval_pc3
def eofs_as(dat): A = climatologia_xarray(dat['curl']).values global land EC, WC, land = get_coasts(dat.lat, dat.lon) msk = np.empty(np.shape(A)) for i in range(0, len(A[:,0,0])): msk[i,:,:] = land B = np.ma.array(A, mask=msk) from get_eddof import get_eddof edof = np.empty([len(dat.lat), len(dat.lon)]) for i in range(0, len(dat.lat)): for j in range(0, len(dat.lon)): if msk[0,i,j] == False: edof[i,j] = get_eddof(B[:,i,j]) else: edof[i,j] = np.nan dof = int(np.nanmean(edof)) coslat = np.cos(np.deg2rad(dat.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(B, center=True, weights=wgts, ddof=dof) eof = solver.eofs(neofs=10, eofscaling=2) pc = solver.pcs(npcs=10, pcscaling=1) varfrac = solver.varianceFraction() eigvals = solver.eigenvalues() x, y = np.meshgrid(dat.lon, dat.lat) return eof, pc, varfrac, x, y, edof
def eof(self,no_of_eofs=1): data_mean = self._data.mean(axis=0) coslat = np.cos(np.deg2rad(self._lat)).clip(0., 1.) #weighting wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(self._data, weights=wgts) # Retrieve the leading EOF, expressed as the covariance between the leading PC # time series and the input SLP anomalies at each grid point. eof = solver.eofsAsCorrelation(neofs=no_of_eofs) fraction=solver.varianceFraction(no_of_eofs) return Eof_pattern(eof, fraction,self._lat,self._lon)
def eof(self, no_of_eofs=1): data_mean = self._data.mean(axis=0) coslat = np.cos(np.deg2rad(self._lat)).clip(0., 1.) #weighting wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(self._data, weights=wgts) # Retrieve the leading EOF, expressed as the covariance between the leading PC # time series and the input SLP anomalies at each grid point. eof = solver.eofsAsCorrelation(neofs=no_of_eofs) fraction = solver.varianceFraction(no_of_eofs) return Eof_pattern(eof, fraction, self._lat, self._lon)
def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400): # load in the PCs and EOFs histo_sy = 1899 histo_ey = 2010 # monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) # monthly_pcs = load_data(monthly_pc_fname) # monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) # monthly_eofs = load_sst_data(monthly_eof_fname, "sst") monthly_residuals_fname = get_HadISST_monthly_residuals_fname( histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(monthly_residuals_fname, 'r') attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] monthly_residuals = numpy.ma.masked_equal(var[:], mv) # weights for reconstruction / projection coslat = numpy.cos( numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.)) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(monthly_residuals, center=False, weights=wgts) monthly_pcs = eof_solver.pcs(npcs=n_pcs) monthly_eofs = eof_solver.eofs(neofs=n_pcs) # get the explanation of variance and calculate the scalar from it M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs)) # get the number of months to predict the PCs for and create the storage histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods() n_mnths = 12 * (rcp_ey - histo_sy) predicted_pcs = numpy.zeros([n_mnths + 12, n_pcs], "f") # fit an AR process to the first ~20 pcs for pc in range(0, n_pcs): # create the model arn = ARN(monthly_pcs[:, pc].squeeze()) # fit the model to the data res = arn.fit() arp = res.k_ar # create a timeseries of predicted values predicted_pcs[:, pc] = M * arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths + arp + 11) # reconstruct the field and return # reconstruct the field monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts) return monthly_intvar
def eof(in_bands): data = np.array([in_bands[i].data for i in range(len(in_bands))]) #take eof over time dimension solver = Eof(data) eof1 = solver.eofs(neofs=1)[0, :] cube = in_bands[0].copy() cube.data = eof1 pc1 = solver.pcs(pcscaling=1, npcs=1)[:, 0] var_frac = solver.varianceFraction(neigs=1)[0] return cube, pc1, var_frac
def calculate_U_EOF(U, SST, THF, lats_ua, lons_ua, lats_SST, lons_SST, lats_THF, lons_THF, lat_min=lat_min, lat_max=lat_max, lon_min=lon_min, lon_max=lon_max, npcs=3): """Function to select a given region and return the first few principal component time series then regress the pcs back onto the zonal wind and SST.""" # select region lat_mask = (lats_ua >= lat_min) & (lats_ua <= lat_max) lon_mask = (lons_ua >= lon_min) & (lons_ua <= lon_max) #print(lats.shape,lons.shape,U.shape,lats[lat_mask].shape,lons[lon_mask].shape) U_region = U[:, lat_mask, :][:, :, lon_mask] U_climatology = np.mean(U, axis=0) # Calculate EOFs coslat = np.cos(np.deg2rad(lats_ua[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(U_region, weights=wgts) pcs = solver.pcs(npcs=npcs, pcscaling=1) variance_fraction = solver.varianceFraction() # perform regressions regress_U = np.zeros([npcs, lats_ua.shape[0], lons_ua.shape[0]]) regress_SST = np.zeros([npcs, lats_SST.shape[0], lons_SST.shape[0]]) regress_THF = np.zeros([npcs, lats_THF.shape[0], lons_THF.shape[0]]) for pc_number in np.arange(npcs): regress_U[pc_number, :, :] = regress_map(pcs[:, pc_number], U, map_type='corr')[0] regress_SST[pc_number, :, :] = regress_map(pcs[:, pc_number], SST, map_type='corr')[0] regress_THF[pc_number, :, :] = regress_map(pcs[:, pc_number], THF, map_type='corr')[0] return pcs, regress_U, regress_SST, regress_THF, variance_fraction[: npcs], U_climatology
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22): # load the already calculated residuals resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n) # note that we don't have to subtract the annual cycle any more as the # residuals are with respect to a smoothed version of the monthly ssts resid_mon_fh = netcdf_file(resid_fname, 'r') sst_var = resid_mon_fh.variables["sst"] lats_var = resid_mon_fh.variables["latitude"] lons_var = resid_mon_fh.variables["longitude"] attrs = sst_var._attributes mv = attrs["_FillValue"] ssts = numpy.array(sst_var[:]) sst_resids = numpy.ma.masked_less(ssts, -1000) # calculate the EOFs and PCs # take the eofs coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(sst_resids, center=True, weights=wgts) pcs = eof_solver.pcs(npcs=n_eofs) eofs = eof_solver.eofs(neofs=n_eofs) varfrac = eof_solver.varianceFraction(neigs=n_eofs) evs = eof_solver.eigenvalues(neigs=n_eofs) evs = evs.reshape([1,evs.shape[0]]) print evs.shape # get the output names out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) out_pcs_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) out_evs_fname = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n) # save the eofs and pcs save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var) out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]]) save_pcs(out_pcs_fname, out_pcs, attrs) save_eigenvalues(out_evs_fname, evs, attrs) resid_mon_fh.close()
def PCA(self, field_name): field_name = field_name start_interv = self.start_pca end_interv = self.end_pca observationPeriod = 'data_' + str(start_interv) + '_to_' + str(end_interv) modelData = np.load(self.directory_data + '/' + field_name + '_' + observationPeriod + '.npy') # Velocity is a 3D vector and needs to be reshaped before the PCA if 'Velocity' in field_name: modelData = np.reshape(modelData, (modelData.shape[0], modelData.shape[1] * modelData.shape[2]), order='F') # Standardise the data with mean 0 meanData = np.nanmean(modelData, 0) stdData = np.nanstd(modelData) modelDataScaled = (modelData - meanData) / stdData #PCA solver solver = Eof(modelDataScaled) # Principal Components time-series pcs = solver.pcs() # Projection eof = solver.eofs() # Cumulative variance varianceCumulative = np.cumsum(solver.varianceFraction()) np.save(self.directory_data + '/' + 'pcs_' + field_name + '_' + observationPeriod, pcs) np.save(self.directory_data + '/' + 'eofs_' + field_name + '_' + observationPeriod, eof) np.save(self.directory_data + '/' + 'varCumulative_' + field_name + '_' + observationPeriod, varianceCumulative) np.save(self.directory_data + '/' + 'mean_' + field_name + '_' + observationPeriod, meanData) np.save(self.directory_data + '/' + 'std_' + field_name + '_' + observationPeriod, stdData)
lon = ncgrid.variables['lon_rho'] print(chlorophyll1.ndim) print(chlorophyll1.dims) surfchl = chlorophyll1[:, 14, :, :] chl_mean = surfchl.mean(axis=0) anomaly = surfchl - chl_mean solver = Eof(anomaly) eof1 = solver.eofsAsCorrelation(neofs=1) pc1 = solver.pcs(npcs=1, pcscaling=1) plt.pcolormesh(lon, lat, eof1[0], cmap=plt.cm.RdBu_r) plt.xlabel('Longitude') plt.ylabel('Latitude') plt.title('EOF1 expressed as Correlation') cbar = plt.colorbar() cbar.set_label('Correlation Coefficient', rotation=270) plt.show() plt.plot(timearray, pc1[:, 0]) plt.xlabel('Year') plt.ylabel('Normalized Units') plt.title('PC1 Time Series') plt.show() vF1 = solver.varianceFraction(neigs=6) percentarray = vF1 * 100 array1 = [1, 2, 3, 4, 5, 6] plt.bar(array1, percentarray) plt.title('Scree Plot') plt.xlabel('Mode') plt.ylabel('Percent of Variance Explained') plt.show()
def calcSeasonalEOF(anomsit, years, year1, year2, monthind, eoftype, pctype): """ Calculates EOF over defined seasonal period Parameters ---------- anomsit : 4d array [year,month,lat,lon] sea ice thickness anomalies years : 1d array years in total year1 : integer min month year2 : integer max month monthind : 1d array indices for months to be calculated in seasonal mean eoftype : integer 1,2 pctype : integer 1,2 Returns ------- eof : array empirical orthogonal function pc : array principal components """ print '\n>>> Using calcSeasonalEOF function!' ### Slice years if np.isfinite(year1): if np.isfinite(year2): yearqq = np.where((years >= year1) & (years <= year2)) anomsit = anomsit[yearqq, :, :, :].squeeze() else: print 'Using entire time series for this EOF!' else: print 'Using entire time series for this EOF!' print 'Sliced time period for seasonal mean!' ### Average over months anomsit = anomsit[:, monthind, :, :] anomsit = np.nanmean(anomsit[:, :, :, :], axis=1) print 'Sliced month period for seasonal mean!' ### Calculate EOF # Create an EOF solver to do the EOF analysis. Square-root of cosine of # latitude weights are applied before the computation of EOFs. coslat = np.cos(np.deg2rad(lats)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(anomsit, weights=wgts) # Retrieve the leading EOF, expressed as the covariance between the # leading PC time series and the input SLP anomalies at each grid point. eof = solver.eofsAsCovariance(neofs=eoftype) pc = solver.pcs(npcs=pctype, pcscaling=1) var = solver.varianceFraction(neigs=eoftype) print 'EOF and PC computed!' print '*Completed: EOF and PC Calculated!\n' return eof, pc, var
def calculate_correlations_and_pvalues(var_pairs, label_to_vname_to_season_to_yearlydata: dict, season_to_months: dict, region_of_interest_mask, lakes_mask=None, lats=None) -> dict: """ :param var_pairs: :param label_to_vname_to_season_to_yearlydata: :param lats needed for weighting of eof solver :return: {(vname1, vname2): {label: {season: [corr, pvalue]}}}} """ res = {} for pair in var_pairs: pair = tuple(pair) res[pair] = {} for label in label_to_vname_to_season_to_yearlydata: res[pair][label] = {} for season in season_to_months: years_sorted = sorted(label_to_vname_to_season_to_yearlydata[label][pair[0]][season]) v1_dict, v2_dict = [label_to_vname_to_season_to_yearlydata[label][pair[vi]][season] for vi in range(2)] v1 = np.array([v1_dict[y] for y in years_sorted]) v2 = np.array([v2_dict[y] for y in years_sorted]) r = np.zeros(v1.shape[1:]).flatten() p = np.ones_like(r).flatten() v1 = v1.reshape((v1.shape[0], -1)) v2 = v2.reshape((v2.shape[0], -1)) # for hles and ice fraction get the eof of the ice and correlate if pair == ("hles_snow", "lake_ice_fraction"): # assume that v2 is the lake_ice_fraction v_lake_ice = v2 positions_hles_region = np.where(region_of_interest_mask.flatten())[0] positions_lakes = np.where(lakes_mask.flatten())[0] v_lake_ice = v_lake_ice[:, positions_lakes] # calculate anomalies v_lake_ice = v_lake_ice - v_lake_ice.mean(axis=0) weights = np.cos(np.deg2rad(lats.flatten()[positions_lakes])) ** 0.5 solver = Eof(v_lake_ice, weights=weights[..., np.newaxis]) print(label, solver.varianceFraction(neigs=10)) # use the module of the PC1 to make sure it has physical meaning pc1_ice = solver.pcs(npcs=1)[:, 0] # debug: plot eof eofs = solver.eofs(neofs=1) eof_2d = np.zeros_like(lats).flatten() eof_2d[positions_lakes] = eofs[:, 0] * pc1_ice eof_2d = eof_2d.reshape(lats.shape) plt.figure() im = plt.pcolormesh(eof_2d.T) plt.colorbar(im) plt.show() if True: raise Exception # print(positions) for i in positions_hles_region: r[i], p[i] = pearsonr(v1[:, i], pc1_ice) else: positions = np.where(region_of_interest_mask.flatten()) # print(positions) for i in positions[0]: r[i], p[i] = pearsonr(v1[:, i], v2[:, i]) r.shape = region_of_interest_mask.shape p.shape = region_of_interest_mask.shape r = np.ma.masked_where(~region_of_interest_mask, r) p = np.ma.masked_where(~region_of_interest_mask, p) res[pair][label][season] = [r, p] return res
sys.exit("Exiting: timeseries have different lengths") if args.normalize: eof_data_std = np.std(eof_data, axis=1) eof_data = eof_data.T / np.std(eof_data, axis=1) else: #transpose so time is first dimension eof_data = eof_data.T # Crete an EOF solver to do the EOF analysis. No weights # First dimension is assumed time by program... not true if timseries is of interest, print("Solving for n={} modes".format(args.eof_num)) solver = Eof(eof_data, center=False) pcs = solver.pcs(npcs=args.eof_num) eigval = solver.eigenvalues(neigs=args.eof_num) varfrac = solver.varianceFraction(neigs=args.eof_num) eofs = solver.eofs(neofs=args.eof_num) eofcorr = solver.eofsAsCorrelation(neofs=args.eof_num) eofcov = solver.eofsAsCovariance(neofs=args.eof_num) """---------------------------------Report-----------------------------------""" ### Print Select Results to file outfile = args.outfile + '.txt' print("EOF Results:", file=open(outfile, "w")) print("------------", file=open(outfile, "a")) print("File path: {}".format("/".join(filename.split('/')[:-1])), file=open(outfile, "a")) for key, filename in (files.items()): print("Files input: {}".format(filename.split('/')[-1]), file=open(outfile, "a"))
fig = plt.figure() for i, eo in enumerate(solver_anom.eofs()): plt.plot(eo, alts, label=i) plt.legend() fig.savefig(cartou + 'eofs_temps_anom.pdf') fig = plt.figure() plt.bar(np.arange(6), solver.eigenvalues()) fig.savefig(cartou + 'eigenvalues_temps.pdf') fig = plt.figure() plt.bar(np.arange(6), solver_anom.eigenvalues()) fig.savefig(cartou + 'eigenvalues_temps_anom.pdf') fig = plt.figure() plt.bar(np.arange(6), solver.varianceFraction()) fig.savefig(cartou + 'varfrac_temps.pdf') fig = plt.figure() plt.bar(np.arange(6), solver_anom.varianceFraction()) fig.savefig(cartou + 'varfrac_temps_anom.pdf') fig = plt.figure() atm_mean = np.mean(temps, axis=0) for i, pc in enumerate(solver.pcs()[:, 0]): plt.plot(atm_mean + pc * solver.eofs()[0] - temps[i, :], alts) fig.savefig(cartou + 'residual_temps_firstpc.pdf') fig = plt.figure() atm_mean = np.mean(temps, axis=0) for i, pc in enumerate(solver_anom.pcs()[:, 0]):
print(type(covmaps)) plt.figure() plt.subplot(211) cs = plt.imshow(covmaps[0], cmap=plt.cm.RdBu_r) cs.set_clim(-1, 1) cb = plt.colorbar(cs) plt.subplot(212) cs = plt.imshow(covmaps[1], cmap=plt.cm.RdBu_r) cs.set_clim(-1, 1) cb = plt.colorbar(cs) # - # Then, we can recover the explained variance: eofvar = solver.varianceFraction(neigs=neofs) * 100 eofvar # Finally, we can obtain the principal components. To obtain normalized time-series, the `pscaling` argument must be equal to 1. pcs = solver.pcs(pcscaling=1, npcs=neofs).T plt.figure() plt.plot(pcs[0], label='pc1') plt.plot(pcs[1], label='pc2') leg = plt.legend() # ## EOF computation (xarray mode) # # In order to have EOF as an `xarray` with all its features, the Eof method of the `eofs.xarray` submodule must be used. from eofs.xarray import Eof
#decoded_imgs = test_data # row/test data ##### eof # set up latitude lat_array = np.asarray([x for x in range(-90, 91)]) cos_lat = np.cos(np.deg2rad(lat_array)) weighted_lat = np.sqrt(cos_lat)[:,np.newaxis] # give a new axis by np.newaxis # solver ntime = decoded_imgs.shape[0]#/(lat*lon) solver = Eof(decoded_imgs.reshape(ntime,lat, lon), weights=weighted_lat) #get analysis eof1 = solver.eofsAsCorrelation(neofs=1) pc1 = solver.pcs(npcs=1, pcscaling=1) var1 = solver.varianceFraction(neigs=1) print('Variance 1st mode %2.2f' %(var1) ) varall = solver.varianceFraction() print('Variance all mode', varall ) print("Sum of all mode's variance = ", sum(varall)) #plotting lons = np.asarray([x for x in range(0,lon)]) lats = lat_array clevs = np.linspace(-1,1,11) fig = plt.figure(figsize=(12,8)) ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=0)) fill = ax.contourf(lons, lats, eof1.squeeze(), clevs, transform=ccrs.PlateCarree(), cmap=plt.cm.RdBu_r ) ax.add_feature(cfeature.LAND, facecolor='w', edgecolor='k')
dataset2 = xr.open_dataset(xr.backends.NetCDF4DataStore(b)) sinData = dataset1['data'].T sinData = (sinData - sinData.mean(axis=0)) / sinData.std(axis=0) sinData = sinData.values bullseyeData = dataset2['data'] #%% EOF analysis solver = Eof(sinData) eigenvalues = solver.eigenvalues() # Get eigenvalues EOFs = solver.eofs(eofscaling=0) # Get EOFs EOFs_reg = solver.eofsAsCorrelation( ) # Get EOFs as correlation b/w PCs & orig data PCs = solver.pcs(pcscaling=1) # Get PCs # Get variance explained and # of PCs VarExplain = np.round(solver.varianceFraction() * 100, 1) numPCs2Keep = cumSUM(VarExplain, 90) # Calculate EOFs EOF1 = EOFs[0, :] * np.sqrt(eigenvalues[0]) # Get EOF 1 & scale it EOF2 = EOFs[1, :] * np.sqrt(eigenvalues[1]) # Get EOF 2 & scale it EOF1_reg = EOFs_reg[0, :] EOF2_reg = EOFs_reg[1, :] stdPC1 = PCs[:, 0] stdPC2 = PCs[:, 1] # Alt method of getting EOF 1 by regressing PC on data #EOF1_reg = np.expand_dims(stdPC1,0) @ sinData #EOF1_reg = (EOF1_reg - np.mean(EOF1_reg))/np.std(EOF1_reg) # Standardize EOF1 # Alt method of getting EOF 1 by regressing PC on data
data=load_regional(filenames,ny,nx) data=np.ma.masked_values(data,2.e+20) print "data loaded",data.shape # Set up info plt.set_cmap('RdBu') neofs=5 nens=data.shape[0] nwanted=57 solver=Eof(data) print 'set up EOF solver' pcs=solver.pcs(npcs=neofs,pcscaling=1) eofs=solver.eofs(neofs=neofs) varfrac=solver.varianceFraction(neigs=neofs) print 'calculated EOFs' print 'printing EOFs' for i in range(neofs): print 'EOF',i plt.clf() plot_region_pnw(eofs[i,:],lat_coord,lon_coord,0,-1,0,-1,'EOF'+str(i),varfrac[i]) print "plotting histograms of PCs" for i in range(3): plt.clf() plt.hist(pcs[:,i],200,range=(-4,4),normed=1,alpha=0.4,label='pc'+str(i)) plt.ylim([0,.6]) plt.savefig(output_dir+'/histogram_pc'+str(i)+'.png') print "plotting mean and stdev of ensemble" plot_region_pnw(data[:].mean(0),lat_coord,lon_coord,0,-1,0,-1,'mean',data.mean()) plot_region_pnw(data[:].std(0),lat_coord,lon_coord,0,-1,0,-1,'stdev',data.std())
def main(): folder_path = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_1980-2009" label_to_hles_dir = OrderedDict([ ("Obs", Path( "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_1980-2009" )), ("CRCM5_NEMO", Path( "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2009" )), ("CRCM5_HL", Path( "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_Hostetler_1980-2009" )), # ("CRCM5_NEMO_TT_PR", Path("/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_based_on_TT_PR_1980-2009")) ]) label_to_line_style = { "Obs": "k.-", "CRCM5_NEMO": "r", "CRCM5_HL": "b", "CRCM5_NEMO_TT_PR": "g" } vname = "snow_fall" units = "cm" #vname = "lkeff_snowfall_days" #units = "days" npc = 1 b = Basemap(lon_0=180, llcrnrlon=common_params.great_lakes_limits.lon_min, llcrnrlat=common_params.great_lakes_limits.lat_min, urcrnrlon=common_params.great_lakes_limits.lon_max, urcrnrlat=common_params.great_lakes_limits.lat_max, resolution="i") label_to_y_to_snfl = {} label_to_pc = {} label_to_eof = OrderedDict() label_to_varfraction = OrderedDict() mask = None plot_utils.apply_plot_params(font_size=12) fig = plt.figure() years = None lats = None lons = None the_mask = None for label, folder in label_to_hles_dir.items(): y_to_snfl = {} y_to_snfldays = {} for the_file in folder.iterdir(): if not the_file.name.endswith(".nc"): continue with Dataset(str(the_file)) as ds: print(ds) snfl = ds.variables[vname][:] year_current = ds.variables["year"][:] if mask is None: lons, lats = [ds.variables[k][:] for k in ["lon", "lat"]] lons[lons > 180] -= 360 mask = maskoceans(lons, lats, lons, inlands=True, resolution="i") y_to_snfl[year_current[0]] = snfl[0] years_ord = sorted(y_to_snfl) label_to_y_to_snfl[label] = y_to_snfl if years is None: years = years_ord data = np.ma.array([y_to_snfl[y] for y in years_ord]) if the_mask is None: the_mask = data[0].mask solver = Eof(data) eof = solver.eofsAsCorrelation() # eof = solver.eofs(neofs=4) pc = solver.pcs(pcscaling=0) label_to_varfraction[label] = solver.varianceFraction() label_to_pc[label] = pc label_to_eof[label] = eof # change the signs of pcs and eofs if label not in ["CRCM5_HL"]: label_to_pc[label][:, 0] *= -1 label_to_eof[label][0, :, :] *= -1 if label in ["CRCM5_NEMO"]: label_to_pc[label][:, 1:] *= -1 label_to_eof[label][1:, :, :] *= -1 # save data for Diro print(pc.shape) df = pd.DataFrame(data=pc, index=years_ord) df.to_csv("{}_{}_pc.csv".format(vname, label)) plt.plot(years_ord, label_to_pc[label][:, 0].copy(), label_to_line_style[label], linewidth=2, label=label) plt.legend(loc="upper left") plt.ylabel(units) plt.xlabel("Year") plt.xticks(years) plt.grid() plt.gcf().autofmt_xdate() plt.savefig(str(label_to_hles_dir["Obs"].joinpath("pc{}_{}.png".format( npc, vname))), bbox_inches="tight") plt.close(fig) # plot the eofs plot_utils.apply_plot_params(font_size=12, width_cm=30, height_cm=6) lons[lons < 0] += 360 xx, yy = b(lons, lats) for eof_ind in range(3): col = 0 fig = plt.figure() gs = GridSpec(1, len(label_to_eof), wspace=0.02) for label, eof_field in label_to_eof.items(): ax = fig.add_subplot(gs[0, col]) to_plot = eof_field[eof_ind] im = b.pcolormesh(xx, yy, to_plot, cmap=cm.get_cmap("bwr", 10), vmin=-0.25, vmax=0.25, ax=ax) cb = b.colorbar(im, extend="both") cb.ax.set_visible(col == len(label_to_eof) - 1) ax.set_title("{} (explains {:.2f}$\sigma^2$)".format( label, label_to_varfraction[label][eof_ind])) col += 1 b.drawcoastlines(ax=ax) # fig.tight_layout() plt.savefig(str(label_to_hles_dir["Obs"].joinpath( "eof_raw_{}_{}.png".format(eof_ind + 1, vname))), bbox_inches="tight", dpi=300) plt.close(fig)
import xarray as xr from eofs.standard import Eof import numpy as np f = xr.open_dataset('../data/pre.nc') pre = np.array(f['pre']) lat = f['lat'] lon = f['lon'] pre_lon = lon pre_lat = lat lat = np.array(lat) coslat = np.cos(np.deg2rad(lat)) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(pre, weights=wgts) eof = solver.eofsAsCorrelation(neofs=3) pc = solver.pcs(npcs=3, pcscaling=1) var = solver.varianceFraction() color1 = [] color2 = [] color3 = [] for i in range(1961, 2017): if pc[i - 1961, 0] >= 0: color1.append('red') elif pc[i - 1961, 0] < 0: color1.append('blue') if pc[i - 1961, 1] >= 0: color2.append('red') elif pc[i - 1961, 1] < 0: color2.append('blue') if pc[i - 1961, 2] >= 0: color3.append('red') elif pc[i - 1961, 2] < 0:
bat = xarray.open_dataset( '/home/bock/Documents/tesis/batimetria/ETOPO1_Bed_g_gmt4.grd') bati = bat.sel(y=slice(-56, -20), x=slice(-80, -40)) x_b, y_b = np.meshgrid(bati['x'], bati['y']) dat = xarray.open_dataset( '/home/bock/Documents/tesis/datos/ncep2_atlsur_2009_2015.nc') clim_nc = dat['curl'].groupby('time.month').mean('time').sel( lat=slice(-20, -40), lon=slice(-64, -22)) coslat = np.cos(np.deg2rad(clim_nc.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(clim_nc.values, weights=wgts) var = solver.varianceFraction() plt.figure(1) plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() plt.close() n = input('Cuantos PC extraer: ') n = int(n) eof = solver.eofs(neofs=n, eofscaling=2) pc = solver.pcs(npcs=n, pcscaling=1) vf = var[:n] fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[0] * 1e7, cm.GMT_no_green, np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m') plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof1_ncep.png', bbox_inches='tight') plt.show()
tnc = camgoda(tfull_path) is3d, var, vname = cnc.ExtractData(variable, box) is3d, var, vname = tnc.ExtractData(variable, box) if n == 0: nlats, nlons = cnc.data.shape boxlat = cnc.boxlat boxlon = cnc.boxlon d = np.zeros(shape=(len(dates), nlats * nlons)) d[n, :] = np.ndarray.flatten(tnc.data - cnc.data) # Compute the amplitude timeseries and EOF spatial distributions of the data array print "Computing the EOF..." EOF = Eof(d, center=removeMeans) eof = EOF.eofs(neofs=num_eofs) pca = EOF.pcs(npcs=num_eofs, pcscaling=1) varfrac = EOF.varianceFraction() print "Finished!" # Reshape F into a spatial grid eof_grid = np.reshape(eof, (eof.shape[0], nlats, nlons)) # Make the maps bmlon, bmlat = np.meshgrid(boxlon, boxlat) southern_lat = boxlat[0] northern_lat = boxlat[-1] left_lon = boxlon[0] right_lon = boxlon[-1] if 0 in boxlon[1:-2]: # if we cross the gml left_lon = boxlon[0] - 360 num_subplots = 2 * num_eofs
hgt_erai_seas_mean = hgt_erai['z'].resample(time='QS-' + lmonth[i]).mean( dim='time', skipna=True) mes = datetime.datetime.strptime(lmonth[i], '%b').month hgt_erai_smean = hgt_erai_seas_mean.sel( time=np.logical_and(hgt_erai_seas_mean['time.month'] == mes, hgt_erai_seas_mean['time.year'] != 2002)) hgt_s4_smean = np.nanmean(hgt_s4.z.values[i:i + 3, :, :, :], axis=0) #eof analysis obs # Compute anomalies by removing the time-mean z_mean = np.nanmean(hgt_erai_smean.values, axis=0) z_anom = hgt_erai_smean.values - z_mean # Create an EOF solver to do the EOF analysis. Square-root of cosine of # latitude weights are applied before the computation of EOFs. solver = Eof(z_anom) #, weights=wgts) eofs = solver.eofsAsCorrelation(neofs=5) exp_var = solver.varianceFraction() pcs = solver.pcs(npcs=5, pcscaling=1) pc_erai[i, :, :] = pcs[:, 0:3] title = 'Observed HGT 200hPa EOFs - ' + season[i] filename = FIG_PATH + 'obs_eof_' + season[i] + '.png' PlotEOF(eofs[0:3, :, :], lat_erai, lon_erai, title, filename) filename = FIG_PATH + 'obs_scree_' + season[i] + '.png' ttle = 'Variance Explained by Observed modes - ' + season[i] PlotScree(exp_var, 36, title, filename) #eof analysis model mean # Compute anomalies by removing the time-mean z_mean = np.nanmean(hgt_s4_smean, axis=0) #computo media del ensamble hgt_s4m_smean = np.mean(np.reshape(hgt_s4_smean, [36, 51, 99, 512]), axis=1) z_anom = hgt_s4m_smean - z_mean