def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400): # load in the PCs and EOFs histo_sy = 1899 histo_ey = 2010 # monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) # monthly_pcs = load_data(monthly_pc_fname) # monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) # monthly_eofs = load_sst_data(monthly_eof_fname, "sst") monthly_residuals_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(monthly_residuals_fname, 'r') attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] monthly_residuals = numpy.ma.masked_equal(var[:], mv) # weights for reconstruction / projection coslat = numpy.cos(numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.)) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(monthly_residuals, center=False, weights=wgts) monthly_pcs = eof_solver.pcs(npcs=n_pcs) monthly_eofs = eof_solver.eofs(neofs=n_pcs) # get the explanation of variance and calculate the scalar from it M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs)) # get the number of months to predict the PCs for and create the storage histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods() n_mnths = 12*(rcp_ey - histo_sy) predicted_pcs = numpy.zeros([n_mnths+12, n_pcs], "f") # fit an AR process to the first ~20 pcs for pc in range(0, n_pcs): # create the model arn = ARN(monthly_pcs[:,pc].squeeze()) # fit the model to the data res = arn.fit() arp = res.k_ar # create a timeseries of predicted values predicted_pcs[:,pc] = M*arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths+arp+11) # reconstruct the field and return # reconstruct the field monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts) return monthly_intvar
def eof_computation(var, varunits, lat, lon): #---------------------------------------------------------------------------------------- print( '____________________________________________________________________________________________________________________' ) print('Computing the EOFs and PCs') #---------------------------------------------------------------------------------------- # EOF analysis of a data array with spatial dimensions that # represent latitude and longitude with weighting. In this example # the data array is dimensioned (ntime, nlat, nlon), and in order # for the latitude weights to be broadcastable to this shape, an # extra length-1 dimension is added to the end: weights_array = np.sqrt(np.cos(np.deg2rad(lat)))[:, np.newaxis] start = datetime.datetime.now() solver = Eof(var, weights=weights_array) end = datetime.datetime.now() print('EOF computation took me %s seconds' % (end - start)) #ALL VARIANCE FRACTIONS varfrac = solver.varianceFraction() acc = np.cumsum(varfrac * 100) #------------------------------------------PCs unscaled (case 0 of scaling) pcs_unscal0 = solver.pcs() #------------------------------------------EOFs unscaled (case 0 of scaling) eofs_unscal0 = solver.eofs() #------------------------------------------PCs scaled (case 1 of scaling) pcs_scal1 = solver.pcs(pcscaling=1) #------------------------------------------EOFs scaled (case 2 of scaling) eofs_scal2 = solver.eofs(eofscaling=2) return solver, pcs_scal1, eofs_scal2, pcs_unscal0, eofs_unscal0, varfrac
def eofs_as(dat): A = climatologia_xarray(dat['curl']).values global land EC, WC, land = get_coasts(dat.lat, dat.lon) msk = np.empty(np.shape(A)) for i in range(0, len(A[:,0,0])): msk[i,:,:] = land B = np.ma.array(A, mask=msk) from get_eddof import get_eddof edof = np.empty([len(dat.lat), len(dat.lon)]) for i in range(0, len(dat.lat)): for j in range(0, len(dat.lon)): if msk[0,i,j] == False: edof[i,j] = get_eddof(B[:,i,j]) else: edof[i,j] = np.nan dof = int(np.nanmean(edof)) coslat = np.cos(np.deg2rad(dat.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(B, center=True, weights=wgts, ddof=dof) eof = solver.eofs(neofs=10, eofscaling=2) pc = solver.pcs(npcs=10, pcscaling=1) varfrac = solver.varianceFraction() eigvals = solver.eigenvalues() x, y = np.meshgrid(dat.lon, dat.lat) return eof, pc, varfrac, x, y, edof
def compute_ipo(sst_anoms, years_pass=11, N=2.0): high = np.int(years_pass * 12.) B, A = signal.butter(N, N / high, btype='lowpass', output='ba') def filter_SST(x): if any(np.isnan(x)): z = x else: z = signal.filtfilt(B, A, x) return z sst_anoms['sst_filtered'] = (('time', 'lat', 'lon'), np.apply_along_axis( filter_SST, 0, sst_anoms['sst_masked'].data)) lat = sst_anoms['lat'].values lon = sst_anoms['lon'].values lons, lats = np.meshgrid(lon, lat) coslat = np.cos(np.deg2rad(lat)) wgts = np.sqrt(coslat)[..., np.newaxis] sst_anoms.load() X = sst_anoms['sst_filtered'].data solver = Eof(X, weights=wgts) eofs = solver.eofsAsCorrelation(neofs=5) pcs = solver.pcs(npcs=5, pcscaling=1) PCs = pd.DataFrame(pcs, index=sst_anoms['time'].to_index()) PCs_monthly = solver.projectField(sst_anoms['sst_masked'].data, 5) PCs_monthly = pd.DataFrame(PCs_monthly, index=sst_anoms['time'].to_index()) return eofs, PCs, lons, lats, PCs_monthly
def calc_HadISST_residual_EOFs(histo_sy, histo_ey, run_n): # load the already calculated residuals resid_fname = get_HadISST_residuals_fname(histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(resid_fname, 'r') lats_var = fh.variables["latitude"] lons_var = fh.variables["longitude"] attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] sst_data = numpy.ma.masked_equal(var[:], mv) # calculate the EOFs and PCs # take the eofs coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(sst_data, center=False, weights=wgts) pcs = eof_solver.pcs(npcs=None) eofs = eof_solver.eofs(neofs=None) # get the output names out_eofs_fname = get_HadISST_residual_EOFs_fname(histo_sy, histo_ey, run_n) out_pcs_fname = get_HadISST_residual_PCs_fname(histo_sy, histo_ey, run_n) # save the eofs and pcs save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var) save_pcs(out_pcs_fname, pcs, attrs) fh.close()
def calculate_EAsia_rm_eofs(data, lats, lons, lat_min=20, lat_max=50, lon_min=110, lon_max=180): """ Calculates EOFs over the East Asian region. Regresses the principal components back onto the original data""" lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) data_EAsia = data[:, lat_mask, :][:, :, lon_mask] # calculate EOFs coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(data_EAsia, weights=wgts) var_frac = solver.varianceFraction() pcs = solver.pcs(npcs=3, pcscaling=1) # regress first modes onto original data reg_pc1, pval_pc1 = regress_map.regress_map(pcs[:, 0], data, map_type='regress') reg_pc2, pval_pc2 = regress_map.regress_map(pcs[:, 1], data, map_type='regress') reg_pc3, pval_pc3 = regress_map.regress_map(pcs[:, 2], data, map_type='regress') return var_frac, reg_pc1, pval_pc1, reg_pc2, pval_pc2, reg_pc3, pval_pc3
def reconstruct_data(arr, neofs=16): if type(neofs) == int: neofs = [neofs] solver = Eof(arr, center=False) for n in neofs: reconstructed = solver.reconstructedField(neofs=n) pcs = solver.pcs(npcs=n) eofs = solver.eofs(neofs=n) yield reconstructed, pcs, eofs
def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400): # load in the PCs and EOFs histo_sy = 1899 histo_ey = 2010 # monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) # monthly_pcs = load_data(monthly_pc_fname) # monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) # monthly_eofs = load_sst_data(monthly_eof_fname, "sst") monthly_residuals_fname = get_HadISST_monthly_residuals_fname( histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(monthly_residuals_fname, 'r') attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] monthly_residuals = numpy.ma.masked_equal(var[:], mv) # weights for reconstruction / projection coslat = numpy.cos( numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.)) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(monthly_residuals, center=False, weights=wgts) monthly_pcs = eof_solver.pcs(npcs=n_pcs) monthly_eofs = eof_solver.eofs(neofs=n_pcs) # get the explanation of variance and calculate the scalar from it M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs)) # get the number of months to predict the PCs for and create the storage histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods() n_mnths = 12 * (rcp_ey - histo_sy) predicted_pcs = numpy.zeros([n_mnths + 12, n_pcs], "f") # fit an AR process to the first ~20 pcs for pc in range(0, n_pcs): # create the model arn = ARN(monthly_pcs[:, pc].squeeze()) # fit the model to the data res = arn.fit() arp = res.k_ar # create a timeseries of predicted values predicted_pcs[:, pc] = M * arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths + arp + 11) # reconstruct the field and return # reconstruct the field monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts) return monthly_intvar
def eof(in_bands): data = np.array([in_bands[i].data for i in range(len(in_bands))]) #take eof over time dimension solver = Eof(data) eof1 = solver.eofs(neofs=1)[0, :] cube = in_bands[0].copy() cube.data = eof1 pc1 = solver.pcs(pcscaling=1, npcs=1)[:, 0] var_frac = solver.varianceFraction(neigs=1)[0] return cube, pc1, var_frac
def get_EOF(data, order=1, mode='corr'): ''' :param data: image data, sst or t300, [month, lon, lat] :param order: int return: eof_corr, eof_cova [order, lon, lat], pc [month, order] ''' solver = Eof(data) if mode == 'corr': res = solver.eofsAsCorrelation(neofs=order) elif mode == 'cova': res = solver.eofsAsCovariance(neofs=order) elif mode == 'pc': res = solver.pcs(npcs=order, pcscaling=1) return res
def E_Cindex(SSTA, lat, lon): """ E and C indices to define EP&CP two orthogonal axes are rotated 45° relative to the principal components of SSTA SSTA with (timme,lat,lon) """ #tropical pacific:120E-80W(60-140),20S-20N(35-55) SSTA_TP = SSTA[:, (lat <= 30) & (lat >= -30), :] SSTA_TP = SSTA_TP[:, :, (lon <= 280) & (lon >= 120)] lat1 = lat[(lat <= 30) & (lat >= -30)] lon1 = lon[(lon <= 280) & (lon >= 120)] #EOF analysis and to get the first 2 pcs #coslat=np.cos(np.deg2rad(np.arange(-20,21,2))) solver = Eof(SSTA_TP[29:, :, :]) pcs = solver.pcs(npcs=2, pcscaling=1) eof = solver.eofsAsCorrelation(neofs=2) a = eof[0, (lat1 <= 5) & (lat1 >= -5), :] b = eof[1, (lat1 <= 5) & (lat1 >= -5), :] if np.mean(a[:, (lon1 <= 240) & (lon1 >= 190)], (0, 1)) < 0: pcs[:, 0] = -pcs[:, 0] if np.mean(b[:, (lon1 <= 240) & (lon1 >= 190)], (0, 1)) > 0: pcs[:, 1] = -pcs[:, 1] #do the 45rotation C_index = (pcs[:, 0] + pcs[:, 1]) / np.sqrt(2) E_index = (pcs[:, 0] - pcs[:, 1]) / np.sqrt(2) #find EP&CP years # ============================================================================= # CI_std=(C_index-np.mean(C_index))/np.std(C_index) # EI_std=(E_index-np.mean(E_index))/np.std(E_index) # ============================================================================= # ============================================================================= # cindex=pd.Series(C_index) # eindex=pd.Series(E_index) # # # #find EP&CP years # CI_std=(cindex-cindex.rolling(window=30).mean())/cindex.rolling(window=30).std() # EI_std=(eindex-eindex.rolling(window=30).mean())/eindex.rolling(window=30).std() # ============================================================================= return C_index, E_index
def calculate_U_EOF(U, SST, THF, lats_ua, lons_ua, lats_SST, lons_SST, lats_THF, lons_THF, lat_min=lat_min, lat_max=lat_max, lon_min=lon_min, lon_max=lon_max, npcs=3): """Function to select a given region and return the first few principal component time series then regress the pcs back onto the zonal wind and SST.""" # select region lat_mask = (lats_ua >= lat_min) & (lats_ua <= lat_max) lon_mask = (lons_ua >= lon_min) & (lons_ua <= lon_max) #print(lats.shape,lons.shape,U.shape,lats[lat_mask].shape,lons[lon_mask].shape) U_region = U[:, lat_mask, :][:, :, lon_mask] U_climatology = np.mean(U, axis=0) # Calculate EOFs coslat = np.cos(np.deg2rad(lats_ua[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(U_region, weights=wgts) pcs = solver.pcs(npcs=npcs, pcscaling=1) variance_fraction = solver.varianceFraction() # perform regressions regress_U = np.zeros([npcs, lats_ua.shape[0], lons_ua.shape[0]]) regress_SST = np.zeros([npcs, lats_SST.shape[0], lons_SST.shape[0]]) regress_THF = np.zeros([npcs, lats_THF.shape[0], lons_THF.shape[0]]) for pc_number in np.arange(npcs): regress_U[pc_number, :, :] = regress_map(pcs[:, pc_number], U, map_type='corr')[0] regress_SST[pc_number, :, :] = regress_map(pcs[:, pc_number], SST, map_type='corr')[0] regress_THF[pc_number, :, :] = regress_map(pcs[:, pc_number], THF, map_type='corr')[0] return pcs, regress_U, regress_SST, regress_THF, variance_fraction[: npcs], U_climatology
def calculate_IOBM(data, lats, lons, times, t_units, calendar): """ Calculate the Indian Ocean basin mode as the first EOF over the region 20S-20N, 40E-110E. See Yang et al (2007) doi:10.1029/2006GL028571""" data[np.abs(data) > 1e3] = np.nan annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar) lat_min, lat_max = -20, 20 lon_min, lon_max = 40, 110 lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) IO_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask] coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(IO_SST, weights=wgts) IOBM = solver.pcs(npcs=1, pcscaling=1).flatten() EOF1 = solver.eofs(neofs=1)[0, :, :] if np.nanmean(EOF1) < 0: IOBM = -IOBM IOBM = (IOBM - np.mean(IOBM)) / np.std(IOBM) return IOBM
def calculate_IPO(data, lats, lons, times, t_units, calendar): """ Calculate the Inter-decadal Pacific Oscillation index Calculated as the first EOF of SST 60S to 60N over the Pacific """ data[np.abs(data) > 1e3] = np.nan # set unreasonably high values to NaN annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar) lat_min, lat_max = -60, 60 lon_min, lon_max = 120, 270 lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask] coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(Pacific_SST, weights=wgts) IPO = solver.pcs(npcs=1, pcscaling=1).flatten() EOF1 = solver.eofs(neofs=1)[0, :, :] if np.nanmean(EOF1) < 0: IPO = -IPO IPO = (IPO - np.mean(IPO)) / np.std(IPO) return IPO
def calculate_PDO(data, lats, lons, times, t_units, calendar): """ Calculate the Pacific Decadal Oscillation index as the first PC of SST between 20N and 70N See Newman et al (2016) doi:10.1175/JCLI-D-15-0508.1""" data[np.abs(data) > 1e3] = np.nan # set unreasonably high values to NaN global_mean_removed = data - global_mean(data, lats).reshape( times.shape[0], 1, 1) annual_cycle_removed = remove_annual_cycle(global_mean_removed, times, t_units, calendar) lat_min, lat_max = 20, 70 lon_min, lon_max = 120, 270 lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) N_Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask] coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(N_Pacific_SST, weights=wgts) EOF1 = solver.eofs(neofs=1)[0, :, :] PDO = solver.pcs(npcs=1, pcscaling=1).flatten() if np.nanmean(EOF1[:, lons[lon_mask] > 210]) < 0: PDO = -PDO PDO = (PDO - np.mean(PDO)) / np.std(PDO) return PDO
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22): # load the already calculated residuals resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n) # note that we don't have to subtract the annual cycle any more as the # residuals are with respect to a smoothed version of the monthly ssts resid_mon_fh = netcdf_file(resid_fname, 'r') sst_var = resid_mon_fh.variables["sst"] lats_var = resid_mon_fh.variables["latitude"] lons_var = resid_mon_fh.variables["longitude"] attrs = sst_var._attributes mv = attrs["_FillValue"] ssts = numpy.array(sst_var[:]) sst_resids = numpy.ma.masked_less(ssts, -1000) # calculate the EOFs and PCs # take the eofs coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(sst_resids, center=True, weights=wgts) pcs = eof_solver.pcs(npcs=n_eofs) eofs = eof_solver.eofs(neofs=n_eofs) varfrac = eof_solver.varianceFraction(neigs=n_eofs) evs = eof_solver.eigenvalues(neigs=n_eofs) evs = evs.reshape([1,evs.shape[0]]) print evs.shape # get the output names out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) out_pcs_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) out_evs_fname = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n) # save the eofs and pcs save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var) out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]]) save_pcs(out_pcs_fname, out_pcs, attrs) save_eigenvalues(out_evs_fname, evs, attrs) resid_mon_fh.close()
def PCA(self, field_name): field_name = field_name start_interv = self.start_pca end_interv = self.end_pca observationPeriod = 'data_' + str(start_interv) + '_to_' + str(end_interv) modelData = np.load(self.directory_data + '/' + field_name + '_' + observationPeriod + '.npy') # Velocity is a 3D vector and needs to be reshaped before the PCA if 'Velocity' in field_name: modelData = np.reshape(modelData, (modelData.shape[0], modelData.shape[1] * modelData.shape[2]), order='F') # Standardise the data with mean 0 meanData = np.nanmean(modelData, 0) stdData = np.nanstd(modelData) modelDataScaled = (modelData - meanData) / stdData #PCA solver solver = Eof(modelDataScaled) # Principal Components time-series pcs = solver.pcs() # Projection eof = solver.eofs() # Cumulative variance varianceCumulative = np.cumsum(solver.varianceFraction()) np.save(self.directory_data + '/' + 'pcs_' + field_name + '_' + observationPeriod, pcs) np.save(self.directory_data + '/' + 'eofs_' + field_name + '_' + observationPeriod, eof) np.save(self.directory_data + '/' + 'varCumulative_' + field_name + '_' + observationPeriod, varianceCumulative) np.save(self.directory_data + '/' + 'mean_' + field_name + '_' + observationPeriod, meanData) np.save(self.directory_data + '/' + 'std_' + field_name + '_' + observationPeriod, stdData)
sst = ncin.variables['sst'][:] lons = ncin.variables['longitude'][:] lats = ncin.variables['latitude'][:] ncin.close() # Create an EOF solver to do the EOF analysis. Square-root of cosine of # latitude weights are applied before the computation of EOFs. coslat = np.cos(np.deg2rad(lats)) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(sst, weights=wgts) # Retrieve the leading EOF, expressed as the correlation between the leading # PC time series and the input SST anomalies at each grid point, and the # leading PC time series itself. eof1 = solver.eofsAsCorrelation(neofs=1) pc1 = solver.pcs(npcs=1, pcscaling=1) # Plot the leading EOF expressed as correlation in the Pacific domain. clevs = np.linspace(-1, 1, 11) ax = plt.axes(projection=ccrs.PlateCarree(central_longitude=190)) fill = ax.contourf(lons, lats, eof1.squeeze(), clevs, transform=ccrs.PlateCarree(), cmap=plt.cm.RdBu_r) ax.add_feature(cfeature.LAND, facecolor='w', edgecolor='k') cb = plt.colorbar(fill, orientation='horizontal') cb.set_label('correlation coefficient', fontsize=12) plt.title('EOF1 expressed as correlation', fontsize=16) # Plot the leading PC time series. plt.figure() years = range(1962, 2012) plt.plot(years, pc1, color='b', linewidth=2)
def main(): folder_path = "/HOME/huziy/skynet3_rech1/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_1980-2009" label_to_hles_dir = OrderedDict([ ("Obs", Path( "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_Obs_1980-2009" )), ("CRCM5_NEMO", Path( "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_1980-2009" )), ("CRCM5_HL", Path( "/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_Hostetler_1980-2009" )), # ("CRCM5_NEMO_TT_PR", Path("/RESCUE/skynet3_rech1/huziy/Netbeans Projects/Python/RPN/lake_effect_analysis_CRCM5_NEMO_based_on_TT_PR_1980-2009")) ]) label_to_line_style = { "Obs": "k.-", "CRCM5_NEMO": "r", "CRCM5_HL": "b", "CRCM5_NEMO_TT_PR": "g" } vname = "snow_fall" units = "cm" #vname = "lkeff_snowfall_days" #units = "days" npc = 1 b = Basemap(lon_0=180, llcrnrlon=common_params.great_lakes_limits.lon_min, llcrnrlat=common_params.great_lakes_limits.lat_min, urcrnrlon=common_params.great_lakes_limits.lon_max, urcrnrlat=common_params.great_lakes_limits.lat_max, resolution="i") label_to_y_to_snfl = {} label_to_pc = {} label_to_eof = OrderedDict() label_to_varfraction = OrderedDict() mask = None plot_utils.apply_plot_params(font_size=12) fig = plt.figure() years = None lats = None lons = None the_mask = None for label, folder in label_to_hles_dir.items(): y_to_snfl = {} y_to_snfldays = {} for the_file in folder.iterdir(): if not the_file.name.endswith(".nc"): continue with Dataset(str(the_file)) as ds: print(ds) snfl = ds.variables[vname][:] year_current = ds.variables["year"][:] if mask is None: lons, lats = [ds.variables[k][:] for k in ["lon", "lat"]] lons[lons > 180] -= 360 mask = maskoceans(lons, lats, lons, inlands=True, resolution="i") y_to_snfl[year_current[0]] = snfl[0] years_ord = sorted(y_to_snfl) label_to_y_to_snfl[label] = y_to_snfl if years is None: years = years_ord data = np.ma.array([y_to_snfl[y] for y in years_ord]) if the_mask is None: the_mask = data[0].mask solver = Eof(data) eof = solver.eofsAsCorrelation() # eof = solver.eofs(neofs=4) pc = solver.pcs(pcscaling=0) label_to_varfraction[label] = solver.varianceFraction() label_to_pc[label] = pc label_to_eof[label] = eof # change the signs of pcs and eofs if label not in ["CRCM5_HL"]: label_to_pc[label][:, 0] *= -1 label_to_eof[label][0, :, :] *= -1 if label in ["CRCM5_NEMO"]: label_to_pc[label][:, 1:] *= -1 label_to_eof[label][1:, :, :] *= -1 # save data for Diro print(pc.shape) df = pd.DataFrame(data=pc, index=years_ord) df.to_csv("{}_{}_pc.csv".format(vname, label)) plt.plot(years_ord, label_to_pc[label][:, 0].copy(), label_to_line_style[label], linewidth=2, label=label) plt.legend(loc="upper left") plt.ylabel(units) plt.xlabel("Year") plt.xticks(years) plt.grid() plt.gcf().autofmt_xdate() plt.savefig(str(label_to_hles_dir["Obs"].joinpath("pc{}_{}.png".format( npc, vname))), bbox_inches="tight") plt.close(fig) # plot the eofs plot_utils.apply_plot_params(font_size=12, width_cm=30, height_cm=6) lons[lons < 0] += 360 xx, yy = b(lons, lats) for eof_ind in range(3): col = 0 fig = plt.figure() gs = GridSpec(1, len(label_to_eof), wspace=0.02) for label, eof_field in label_to_eof.items(): ax = fig.add_subplot(gs[0, col]) to_plot = eof_field[eof_ind] im = b.pcolormesh(xx, yy, to_plot, cmap=cm.get_cmap("bwr", 10), vmin=-0.25, vmax=0.25, ax=ax) cb = b.colorbar(im, extend="both") cb.ax.set_visible(col == len(label_to_eof) - 1) ax.set_title("{} (explains {:.2f}$\sigma^2$)".format( label, label_to_varfraction[label][eof_ind])) col += 1 b.drawcoastlines(ax=ax) # fig.tight_layout() plt.savefig(str(label_to_hles_dir["Obs"].joinpath( "eof_raw_{}_{}.png".format(eof_ind + 1, vname))), bbox_inches="tight", dpi=300) plt.close(fig)
def EP_CPindex(SSTA, lat, lon): """ EP_CPindex method to define CP&EP FOR CP: regression of Nino1+2 SSTA associated with eastern warming is removed FOR EP: regression of Nino 4 SSTA associated with cetral warming is removed both EOF to find 1st PC time series (exceed on standard deviation-1 sigma) SSTA with (time,lat,lon) with masked values and nan """ #Nino1+2:90W-80W(135-140),0-10S(45-50) ssta12 = SSTA[:, (lat <= 10) & (lat >= 0), :] Nino12 = np.ma.average(ssta12[:, :, (lon <= 280) & (lon >= 270)], (1, 2)) Nino12 = np.ma.getdata(Nino12) #Nino4:160E-150W(80-105),5N-5S(43-47) ssta4 = SSTA[:, (lat <= 5) & (lat >= -5), :] Nino4 = np.ma.average(ssta4[:, :, (lon <= 210) & (lon >= 160)], (1, 2)) Nino4 = np.ma.getdata(Nino4) #tropical pacific:120E-80W(60-140),20S-20N(35-55) SSTA_TP = SSTA[:, (lat <= 30) & (lat >= -30), :] SSTA_TP = SSTA_TP[:, :, (lon <= 280) & (lon >= 120)] lat1 = lat[(lat <= 30) & (lat >= -30)] lon1 = lon[(lon <= 280) & (lon >= 120)] SSTA_TP12 = np.zeros(SSTA_TP.shape) SSTA_TP4 = np.zeros(SSTA_TP.shape) for i in range(0, SSTA_TP.shape[1]): for j in range(0, SSTA_TP.shape[2]): k12, _, _, _, _ = stats.linregress(Nino12, SSTA_TP[:, i, j]) SSTA_TP12[:, i, j] = SSTA_TP[:, i, j] - k12 * Nino12 k4, _, _, _, _ = stats.linregress(Nino4, SSTA_TP[:, i, j]) SSTA_TP4[:, i, j] = SSTA_TP[:, i, j] - k4 * Nino4 #EOF analysis #coslat=np.cos(np.deg2rad(np.arange(-20,21,2))) #wgt=np.sqrt(coslat)[..., np.newaxis] solver12 = Eof(SSTA_TP12) eof12 = solver12.eofsAsCorrelation(neofs=1) PC12 = solver12.pcs(npcs=1, pcscaling=1) PC12 = PC12[:, 0] a = eof12[:, (lat1 <= 5) & (lat1 >= -5), :] if np.mean(a[:, :, (lon1 <= 240) & (lon1 >= 190)].squeeze(), (0, 1)) < 0: PC12 = -PC12 solver4 = Eof(SSTA_TP4) eof4 = solver4.eofsAsCorrelation(neofs=1) PC4 = solver4.pcs(npcs=1, pcscaling=1) PC4 = PC4[:, 0] b = eof4[:, (lat1 <= 5) & (lat1 >= -5), :] if np.mean(b[:, :, (lon1 <= 240) & (lon1 >= 190)].squeeze(), (0, 1)) < 0: PC4 = -PC4 #PC12 is for cp definition and PC4 is for EP #standardized # ============================================================================= # pc12_std=(PC12-np.mean(PC12))/np.std(PC12) # pc4_std=(PC4-np.mean(PC4))/np.std(PC4) # ============================================================================= # ============================================================================= # pc12=pd.Series(PC12[:,0]) # pc4=pd.Series(PC4[:,0]) # pc12_std=(pc12-pc12.rolling(window=30).mean())/pc12.rolling(window=30).std() # pc4_std=(pc4-pc4.rolling(window=30).mean())/pc4.rolling(window=30).std() # ============================================================================= return PC12, PC4 #CP, EP
def calcSeasonalEOF(anomslp,years,year1,year2,monthind,eoftype,pctype): """ Calculates EOF over defined seasonal period Parameters ---------- anomslp : 4d array [year,month,lat,lon] sea level pressure anomalies years : 1d array years in total year1 : integer min month year2 : integer max month monthind : 1d array indices for months to be calculated in seasonal mean eoftype : integer 1,2 pctype : integer 1,2 Returns ------- eof : array empirical orthogonal function pc : array principal components """ print '\n>>> Using calcSeasonalEOF function!' ### Slice years if np.isfinite(year1): if np.isfinite(year2): yearqq = np.where((years >= year1) & (years <= year2)) anomslp = anomslp[yearqq,:,:,:].squeeze() else: print 'Using entire time series for this EOF!' else: print 'Using entire time series for this EOF!' print 'Sliced time period for seasonal mean!' ### Average over months # anomslp = anomslp[:,monthind,:,:] # anomslp = np.nanmean(anomslp[:,:,:,:],axis=1) print 'Sliced month period for seasonal mean!' anomslpq = anomslp pc = np.empty((anomslpq.shape[0],2,anomslpq.shape[1])) for i in xrange(anomslp.shape[1]): anomslp = anomslpq[:,i,:,:] ### Calculate EOF # Create an EOF solver to do the EOF analysis. Square-root of cosine of # latitude weights are applied before the computation of EOFs. coslat = np.cos(np.deg2rad(lats)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(anomslp, weights=wgts) # Retrieve the leading EOF, expressed as the covariance between the # leading PC time series and the input SLP anomalies at each grid point. eof = solver.eofsAsCovariance(neofs=eoftype) pc[:,:,i] = solver.pcs(npcs=pctype, pcscaling=1) print 'EOF and PC computed!' print '*Completed: EOF and PC Calculated!\n' return eof,pc
way = np.cos(cos) weightf = np.repeat(way[:, np.newaxis], len(lon_at), axis=1) # add weighting function (because of the latitude) atemp_era5 = signal.detrend( hgt500_era5, axis=0) # # linearly detrend 500 hPa geopotential height data atemp_era5_pre = np.zeros((nt * ny, nlat, nlon)) for iy in np.arange(ny): atemp_era5_pre[iy * nt:iy * nt + nt] = atemp_era5[iy] * weightf[None, :, :] ### we did not using n-day moving average as some other studies do partly because the original goal for us (Jiacheng & Zhuo) ### is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible ### to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals ### To be consistent with other reseachers, one can add n-day moving code above. # EOF analysis solver = Eof(atemp_era5_pre, center=True) pcs = solver.pcs() mid_eig = solver.eigenvalues() mid_eofs = solver.eofs() eofs = solver.eofs() ### Print explained variance when using 4 EOFs #var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig)) #print(var_explained_era5[3]) #0.5316330300316366 reconstruction_era5 = solver.reconstructedField( noef) #Using 4 leading EOFs to reconstruct hgt500 field ### The Kmeans method needs a 2-D data format: number of days x horizontal fields atemp_era5_post = np.zeros((ny * nt, nlat * nlon)) for i in np.arange(ny * nt):
cs.set_clim(-1, 1) cb = plt.colorbar(cs) plt.subplot(212) cs = plt.imshow(covmaps[1], cmap=plt.cm.RdBu_r) cs.set_clim(-1, 1) cb = plt.colorbar(cs) # - # Then, we can recover the explained variance: eofvar = solver.varianceFraction(neigs=neofs) * 100 eofvar # Finally, we can obtain the principal components. To obtain normalized time-series, the `pscaling` argument must be equal to 1. pcs = solver.pcs(pcscaling=1, npcs=neofs).T plt.figure() plt.plot(pcs[0], label='pc1') plt.plot(pcs[1], label='pc2') leg = plt.legend() # ## EOF computation (xarray mode) # # In order to have EOF as an `xarray` with all its features, the Eof method of the `eofs.xarray` submodule must be used. from eofs.xarray import Eof # Since it uses named labels, the `time_counter` dimension must first be renamed in `time`: anoms = anoms.rename({'time_counter': 'time'}) solver = Eof(anoms, weights=weights)
z5_diffnao = z5_diffn[:, :, :, lonq] z5n_h = np.nanmean(z500_h[:, 91:, latq, :], axis=0) z5nao_h = z5n_h[:, :, lonq] ### Calculate NAO # Create an EOF solver to do the EOF analysis. Square-root of cosine of # latitude weights are applied before the computation of EOFs. coslat = np.cos(np.deg2rad(latnao)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(z5nao_h, weights=wgts) # Retrieve the leading EOF, expressed as the covariance between the leading PC # time series and the input SLP anomalies at each grid point. eof1 = solver.eofsAsCovariance(neofs=1).squeeze() pc1 = solver.pcs(npcs=1, pcscaling=1).squeeze() ### Calculate NAO index def NAOIndex(anomz5, eofpattern, members): """ Calculate NAO index by regressing Z500 onto the EOF1 pattern """ print('\n>>> Using NAO Index function!') if members == True: nao = np.empty((anomz5.shape[0], anomz5.shape[1])) for i in range(anomz5.shape[0]): print('Regressing ensemble ---> %s!' % (i + 1)) for j in range(anomz5.shape[1]): varx = np.ravel(anomz5[i, j, :, :])
from eofs.standard import Eof # ncep coslat = np.cos(np.deg2rad(curl_ncep_clim.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(curl_ncep_clim.values, weights=wgts) var = solver.varianceFraction() plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() n = 1 eof_ncep = solver.eofs(neofs=n, eofscaling=2) pc_ncep = solver.pcs(npcs=n, pcscaling=1) vf_ncep = var[:n] # cfsr coslat = np.cos(np.deg2rad(curl_cfsr_clim.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(curl_cfsr_clim.values, weights=wgts) var = solver.varianceFraction() plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() n = 1 eof_cfsr = solver.eofs(neofs=n, eofscaling=2) pc_cfsr = solver.pcs(npcs=n, pcscaling=1)
# filenames=filenames[:200] data=load_regional(filenames,ny,nx) data=np.ma.masked_values(data,2.e+20) print "data loaded",data.shape # Set up info plt.set_cmap('RdBu') neofs=5 nens=data.shape[0] nwanted=57 solver=Eof(data) print 'set up EOF solver' pcs=solver.pcs(npcs=neofs,pcscaling=1) eofs=solver.eofs(neofs=neofs) varfrac=solver.varianceFraction(neigs=neofs) print 'calculated EOFs' print 'printing EOFs' for i in range(neofs): print 'EOF',i plt.clf() plot_region_pnw(eofs[i,:],lat_coord,lon_coord,0,-1,0,-1,'EOF'+str(i),varfrac[i]) print "plotting histograms of PCs" for i in range(3): plt.clf() plt.hist(pcs[:,i],200,range=(-4,4),normed=1,alpha=0.4,label='pc'+str(i)) plt.ylim([0,.6]) plt.savefig(output_dir+'/histogram_pc'+str(i)+'.png') print "plotting mean and stdev of ensemble"
dim='time', skipna=True) mes = datetime.datetime.strptime(lmonth[i], '%b').month hgt_erai_smean = hgt_erai_seas_mean.sel( time=np.logical_and(hgt_erai_seas_mean['time.month'] == mes, hgt_erai_seas_mean['time.year'] != 2002)) hgt_s4_smean = np.nanmean(hgt_s4.z.values[i:i + 3, :, :, :], axis=0) #eof analysis obs # Compute anomalies by removing the time-mean z_mean = np.nanmean(hgt_erai_smean.values, axis=0) z_anom = hgt_erai_smean.values - z_mean # Create an EOF solver to do the EOF analysis. Square-root of cosine of # latitude weights are applied before the computation of EOFs. solver = Eof(z_anom) #, weights=wgts) eofs = solver.eofsAsCorrelation(neofs=5) exp_var = solver.varianceFraction() pcs = solver.pcs(npcs=5, pcscaling=1) pc_erai[i, :, :] = pcs[:, 0:3] title = 'Observed HGT 200hPa EOFs - ' + season[i] filename = FIG_PATH + 'obs_eof_' + season[i] + '.png' PlotEOF(eofs[0:3, :, :], lat_erai, lon_erai, title, filename) filename = FIG_PATH + 'obs_scree_' + season[i] + '.png' ttle = 'Variance Explained by Observed modes - ' + season[i] PlotScree(exp_var, 36, title, filename) #eof analysis model mean # Compute anomalies by removing the time-mean z_mean = np.nanmean(hgt_s4_smean, axis=0) #computo media del ensamble hgt_s4m_smean = np.mean(np.reshape(hgt_s4_smean, [36, 51, 99, 512]), axis=1) z_anom = hgt_s4m_smean - z_mean solver_s4 = Eof(z_anom) #, weights=wgts)
fig = plt.figure() plt.bar(np.arange(6), solver_anom.eigenvalues()) fig.savefig(cartou + 'eigenvalues_temps_anom.pdf') fig = plt.figure() plt.bar(np.arange(6), solver.varianceFraction()) fig.savefig(cartou + 'varfrac_temps.pdf') fig = plt.figure() plt.bar(np.arange(6), solver_anom.varianceFraction()) fig.savefig(cartou + 'varfrac_temps_anom.pdf') fig = plt.figure() atm_mean = np.mean(temps, axis=0) for i, pc in enumerate(solver.pcs()[:, 0]): plt.plot(atm_mean + pc * solver.eofs()[0] - temps[i, :], alts) fig.savefig(cartou + 'residual_temps_firstpc.pdf') fig = plt.figure() atm_mean = np.mean(temps, axis=0) for i, pc in enumerate(solver_anom.pcs()[:, 0]): plt.plot(atm_anom_mean + pc * solver_anom.eofs()[0] - temps_anom[i, :], alts) fig.savefig(cartou + 'residual_temps_anom_firstpc.pdf') # plt.figure() # for i, pc in enumerate(solver.pcs()[:,:2]): # plt.plot(atm_mean+pc[0]*solver.eofs()[0]+pc[1]*solver.eofs()[1]-temps[i,:], alts) # ok so, if keeping only first and second eof I'm able to explain quite a fraction of the variability
#timearray.append(dt.fromtimestamp(i)) for i in timearray: print(i) gridfile = '/users/asmit101/data/stuff/myngbay_grd.nc' ncgrid = NetCDFFile(gridfile) lat = ncgrid.variables['lat_rho'] lon = ncgrid.variables['lon_rho'] print(chlorophyll1.ndim) print(chlorophyll1.dims) surfchl = chlorophyll1[:, 14, :, :] chl_mean = surfchl.mean(axis=0) anomaly = surfchl - chl_mean solver = Eof(anomaly) eof1 = solver.eofsAsCorrelation(neofs=1) pc1 = solver.pcs(npcs=1, pcscaling=1) plt.pcolormesh(lon, lat, eof1[0], cmap=plt.cm.RdBu_r) plt.xlabel('Longitude') plt.ylabel('Latitude') plt.title('EOF1 expressed as Correlation') cbar = plt.colorbar() cbar.set_label('Correlation Coefficient', rotation=270) plt.show() plt.plot(timearray, pc1[:, 0]) plt.xlabel('Year') plt.ylabel('Normalized Units') plt.title('PC1 Time Series') plt.show() vF1 = solver.varianceFraction(neigs=6) percentarray = vF1 * 100 array1 = [1, 2, 3, 4, 5, 6]
eof_data = np.vstack((eof_data, data[dt_index])) except ValueError: sys.exit("Exiting: timeseries have different lengths") if args.normalize: eof_data_std = np.std(eof_data, axis=1) eof_data = eof_data.T / np.std(eof_data, axis=1) else: #transpose so time is first dimension eof_data = eof_data.T # Crete an EOF solver to do the EOF analysis. No weights # First dimension is assumed time by program... not true if timseries is of interest, print("Solving for n={} modes".format(args.eof_num)) solver = Eof(eof_data, center=False) pcs = solver.pcs(npcs=args.eof_num) eigval = solver.eigenvalues(neigs=args.eof_num) varfrac = solver.varianceFraction(neigs=args.eof_num) eofs = solver.eofs(neofs=args.eof_num) eofcorr = solver.eofsAsCorrelation(neofs=args.eof_num) eofcov = solver.eofsAsCovariance(neofs=args.eof_num) """---------------------------------Report-----------------------------------""" ### Print Select Results to file outfile = args.outfile + '.txt' print("EOF Results:", file=open(outfile, "w")) print("------------", file=open(outfile, "a")) print("File path: {}".format("/".join(filename.split('/')[:-1])), file=open(outfile, "a")) for key, filename in (files.items()): print("Files input: {}".format(filename.split('/')[-1]),
import cartopy.io.shapereader as shpreader import xarray as xr from eofs.standard import Eof import numpy as np f = xr.open_dataset('../data/pre.nc') pre = np.array(f['pre']) lat = f['lat'] lon = f['lon'] pre_lon = lon pre_lat = lat lat = np.array(lat) coslat = np.cos(np.deg2rad(lat)) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(pre, weights=wgts) eof = solver.eofsAsCorrelation(neofs=3) pc = solver.pcs(npcs=3, pcscaling=1) var = solver.varianceFraction() color1 = [] color2 = [] color3 = [] for i in range(1961, 2017): if pc[i - 1961, 0] >= 0: color1.append('red') elif pc[i - 1961, 0] < 0: color1.append('blue') if pc[i - 1961, 1] >= 0: color2.append('red') elif pc[i - 1961, 1] < 0: color2.append('blue') if pc[i - 1961, 2] >= 0: color3.append('red')
def main(mFilepath, xFilepath, yFilepath, window, windowFlag=True): ## load in the data matrix as a numpy array m = np.loadtxt(mFilepath, dtype='float', delimiter=',', skiprows=1) # lon = np.loadtxt(xFilepath, dtype='float', delimiter=',', skiprows=1) # lat = np.loadtxt(yFilepath, dtype='float', delimiter=',', skiprows=1) # time = np.arange('1958-01-01', '2014-09-22', dtype='datetime64') # years = range(1958, 2014) ## Create a list of dates spanning the study period base = dt.datetime(2014, 9, 21, 1, 1, 1, 1) dates = [base - dt.timedelta(days=x) for x in range(0, 20718)] date_list = [item for item in reversed(dates)] ## attempted to read in the raw data, but was struggling with ## the array dimensions # ncFiles = os.listdir(workspace) # slpList, lonList, latList, timeList = [], [], [], [] # for fileIn in ncFiles: # ncIn = Dataset(os.path.join(workspace, fileIn), 'r') # slpList.append(ncIn.variables['slp'][:]/100) # lonList.append(ncIn.variables['lon'][:]) # latList.append(ncIn.variables['lat'][:]) # timeList.append(ncIn.variables['time'][:]) # ncIn.close() # slp = np.array(slpList) # print(slp) # print(slp.shape) # # print(slp) # # print(np.shape(slp)) ## create an EOF solver object and extrac the first ## 4 EOFs and their associated PCs. Scaling can be ## applied if desired ## http://ajdawson.github.io/eofs/api/eofs.standard.html#eofs.standard.Eof solver = Eof(m) eofs = solver.eofs(neofs=4, eofscaling=0) pcs = solver.pcs(npcs=4, pcscaling=0) # lon, lat = np.meshgrid(lon, lat) ## plot the EOFs as nongeographic data for simplicity fig = plt.figure(figsize=(10, 10)) for i in range(4): ax = fig.add_subplot(2, 2, i+1) lab = 'EOF' + str(i + 1) main = 'Unscaled ' + lab eofPlot = eofs[i,].reshape(17, 32) plt.imshow(eofPlot, cmap=plt.cm.RdBu_r) plt.title(main) cb = plt.colorbar(orientation='horizontal', cmap=plt.cm.RdBu_r) cb.set_label(lab, fontsize=12) ## Basemap failure below. Something with the y cell size went wrong # bm = Basemap(projection='cyl', llcrnrlat=16.17951, urcrnrlat=68.48459, # llcrnrlon=-176.0393, urcrnrlon=-98.07901, resolution='c') # # bm.contourf(x, y, eof1.squeeze(), clevs, cmap=plt.cm.RdBu_r) # bm.drawcoastlines() # bm.drawstates() # im = bm.pcolormesh(lon, lat, eofPlot, cmap=plt.cm.RdBu_r, latlon=True) # # bm.fillcontinents(color='coral', lake_color='aqua') # bm.drawparallels(np.arange(-90.,91.,15.)) # bm.drawmeridians(np.arange(-180.,181.,30.)) # # bm.drawmapboundary(fill_color='aqua') # cb = plt.colorbar(orientation='horizontal') # cb.set_label(lab, fontsize=12) # plt.title(main, fontsize=16) # plt.show() plt.show() ## Plot the PCs as a time series fig = plt.figure(figsize=(16, 16)) for i in range(4): ylab = 'PC' + str(i+1) title = ylab + ' Time Series' pcPlot = pcs[:,i] if i==0: theAx = fig.add_subplot(4, 1, i+1) plt.setp(theAx.get_xticklabels(), visible=False) theAx.set_xlabel('') if i>0 and i<3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.setp(ax.get_xticklabels(), visible=False) if i==3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.xlabel('Date') plt.plot(date_list, pcPlot, color='b') if windowFlag: plt.plot(date_list, movingaverage(pcPlot, window), color='r', linestyle='-') plt.axhline(0, color='k') plt.title(title) plt.ylabel(ylab) plt.show() ## Subset the dates to the last year of the dataset short_date = [item for item in date_list if item >= dt.datetime(2013, 6, 17) and item < dt.datetime(2014, 6, 25)] indices = [date_list.index(item) for item in short_date] fig = plt.figure(figsize=(16, 16)) ## Plot out the last year of the PCs to get a more detailed ## pattern for comparison to the R results for i in range(4): ylab = 'PC' + str(i+1) title = ylab + ' Time Series (1 year)' pcPlot = pcs[np.array(indices),i] if i==0: theAx = fig.add_subplot(4, 1, i+1) plt.setp(theAx.get_xticklabels(), visible=False) theAx.set_xlabel('') if i>0 and i<3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.setp(ax.get_xticklabels(), visible=False) if i==3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.xlabel('Date') plt.plot(short_date, pcPlot, color='b') if windowFlag: plt.plot(short_date, movingaverage(pcPlot, window), color='r') plt.axhline(0, color='k') plt.title(title) plt.ylabel(ylab) plt.show() ## Subset the dates to the last year of the dataset decade = [item for item in date_list if item >= dt.datetime(2004, 6, 17) and item < dt.datetime(2014, 6, 17)] decadeIndices = [date_list.index(item) for item in decade] fig = plt.figure(figsize=(16, 16)) ## Plot out the last year of the PCs to get a more detailed ## pattern for comparison to the R results for i in range(4): ylab = 'PC' + str(i+1) title = ylab + ' Time Series (1 decade)' pcPlot = pcs[np.array(decadeIndices),i] if i==0: theAx = fig.add_subplot(4, 1, i+1) plt.setp(theAx.get_xticklabels(), visible=False) theAx.set_xlabel('') if i>0 and i<3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.setp(ax.get_xticklabels(), visible=False) if i==3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.xlabel('Date') plt.plot(decade, pcPlot, color='b') if windowFlag: plt.plot(decade, movingaverage(pcPlot, window), color='r') plt.axhline(0, color='k') plt.title(title) plt.ylabel(ylab) plt.show()
cnc = camgoda(cfull_path) tnc = camgoda(tfull_path) is3d, var, vname = cnc.ExtractData(variable, box) is3d, var, vname = tnc.ExtractData(variable, box) if n == 0: nlats, nlons = cnc.data.shape boxlat = cnc.boxlat boxlon = cnc.boxlon d = np.zeros(shape=(len(dates), nlats * nlons)) d[n, :] = np.ndarray.flatten(tnc.data - cnc.data) # Compute the amplitude timeseries and EOF spatial distributions of the data array print "Computing the EOF..." EOF = Eof(d, center=removeMeans) eof = EOF.eofs(neofs=num_eofs) pca = EOF.pcs(npcs=num_eofs, pcscaling=1) varfrac = EOF.varianceFraction() print "Finished!" # Reshape F into a spatial grid eof_grid = np.reshape(eof, (eof.shape[0], nlats, nlons)) # Make the maps bmlon, bmlat = np.meshgrid(boxlon, boxlat) southern_lat = boxlat[0] northern_lat = boxlat[-1] left_lon = boxlon[0] right_lon = boxlon[-1] if 0 in boxlon[1:-2]: # if we cross the gml left_lon = boxlon[0] - 360
def calculate_correlations_and_pvalues(var_pairs, label_to_vname_to_season_to_yearlydata: dict, season_to_months: dict, region_of_interest_mask, lakes_mask=None, lats=None) -> dict: """ :param var_pairs: :param label_to_vname_to_season_to_yearlydata: :param lats needed for weighting of eof solver :return: {(vname1, vname2): {label: {season: [corr, pvalue]}}}} """ res = {} for pair in var_pairs: pair = tuple(pair) res[pair] = {} for label in label_to_vname_to_season_to_yearlydata: res[pair][label] = {} for season in season_to_months: years_sorted = sorted(label_to_vname_to_season_to_yearlydata[label][pair[0]][season]) v1_dict, v2_dict = [label_to_vname_to_season_to_yearlydata[label][pair[vi]][season] for vi in range(2)] v1 = np.array([v1_dict[y] for y in years_sorted]) v2 = np.array([v2_dict[y] for y in years_sorted]) r = np.zeros(v1.shape[1:]).flatten() p = np.ones_like(r).flatten() v1 = v1.reshape((v1.shape[0], -1)) v2 = v2.reshape((v2.shape[0], -1)) # for hles and ice fraction get the eof of the ice and correlate if pair == ("hles_snow", "lake_ice_fraction"): # assume that v2 is the lake_ice_fraction v_lake_ice = v2 positions_hles_region = np.where(region_of_interest_mask.flatten())[0] positions_lakes = np.where(lakes_mask.flatten())[0] v_lake_ice = v_lake_ice[:, positions_lakes] # calculate anomalies v_lake_ice = v_lake_ice - v_lake_ice.mean(axis=0) weights = np.cos(np.deg2rad(lats.flatten()[positions_lakes])) ** 0.5 solver = Eof(v_lake_ice, weights=weights[..., np.newaxis]) print(label, solver.varianceFraction(neigs=10)) # use the module of the PC1 to make sure it has physical meaning pc1_ice = solver.pcs(npcs=1)[:, 0] # debug: plot eof eofs = solver.eofs(neofs=1) eof_2d = np.zeros_like(lats).flatten() eof_2d[positions_lakes] = eofs[:, 0] * pc1_ice eof_2d = eof_2d.reshape(lats.shape) plt.figure() im = plt.pcolormesh(eof_2d.T) plt.colorbar(im) plt.show() if True: raise Exception # print(positions) for i in positions_hles_region: r[i], p[i] = pearsonr(v1[:, i], pc1_ice) else: positions = np.where(region_of_interest_mask.flatten()) # print(positions) for i in positions[0]: r[i], p[i] = pearsonr(v1[:, i], v2[:, i]) r.shape = region_of_interest_mask.shape p.shape = region_of_interest_mask.shape r = np.ma.masked_where(~region_of_interest_mask, r) p = np.ma.masked_where(~region_of_interest_mask, p) res[pair][label][season] = [r, p] return res
a = Dataset(filename1, mode='r') b = Dataset(filename2, mode='r') dataset1 = xr.open_dataset(xr.backends.NetCDF4DataStore(a)) dataset2 = xr.open_dataset(xr.backends.NetCDF4DataStore(b)) sinData = dataset1['data'].T sinData = (sinData - sinData.mean(axis=0)) / sinData.std(axis=0) sinData = sinData.values bullseyeData = dataset2['data'] #%% EOF analysis solver = Eof(sinData) eigenvalues = solver.eigenvalues() # Get eigenvalues EOFs = solver.eofs(eofscaling=0) # Get EOFs EOFs_reg = solver.eofsAsCorrelation( ) # Get EOFs as correlation b/w PCs & orig data PCs = solver.pcs(pcscaling=1) # Get PCs # Get variance explained and # of PCs VarExplain = np.round(solver.varianceFraction() * 100, 1) numPCs2Keep = cumSUM(VarExplain, 90) # Calculate EOFs EOF1 = EOFs[0, :] * np.sqrt(eigenvalues[0]) # Get EOF 1 & scale it EOF2 = EOFs[1, :] * np.sqrt(eigenvalues[1]) # Get EOF 2 & scale it EOF1_reg = EOFs_reg[0, :] EOF2_reg = EOFs_reg[1, :] stdPC1 = PCs[:, 0] stdPC2 = PCs[:, 1] # Alt method of getting EOF 1 by regressing PC on data #EOF1_reg = np.expand_dims(stdPC1,0) @ sinData
'/home/bock/Documents/tesis/datos/ncep2_atlsur_2009_2015.nc') clim_nc = dat['curl'].groupby('time.month').mean('time').sel( lat=slice(-20, -40), lon=slice(-64, -22)) coslat = np.cos(np.deg2rad(clim_nc.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(clim_nc.values, weights=wgts) var = solver.varianceFraction() plt.figure(1) plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() plt.close() n = input('Cuantos PC extraer: ') n = int(n) eof = solver.eofs(neofs=n, eofscaling=2) pc = solver.pcs(npcs=n, pcscaling=1) vf = var[:n] fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[0] * 1e7, cm.GMT_no_green, np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m') plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof1_ncep.png', bbox_inches='tight') plt.show() fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[1] * 1e7, cm.GMT_no_green, np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m') plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof2_ncep.png', bbox_inches='tight') plt.show() dat1 = xarray.open_dataset( '/home/bock/Documents/tesis/datos/cfsr_atlsur_2009_2015.nc')