def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400): # load in the PCs and EOFs histo_sy = 1899 histo_ey = 2010 # monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) # monthly_pcs = load_data(monthly_pc_fname) # monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) # monthly_eofs = load_sst_data(monthly_eof_fname, "sst") monthly_residuals_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(monthly_residuals_fname, 'r') attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] monthly_residuals = numpy.ma.masked_equal(var[:], mv) # weights for reconstruction / projection coslat = numpy.cos(numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.)) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(monthly_residuals, center=False, weights=wgts) monthly_pcs = eof_solver.pcs(npcs=n_pcs) monthly_eofs = eof_solver.eofs(neofs=n_pcs) # get the explanation of variance and calculate the scalar from it M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs)) # get the number of months to predict the PCs for and create the storage histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods() n_mnths = 12*(rcp_ey - histo_sy) predicted_pcs = numpy.zeros([n_mnths+12, n_pcs], "f") # fit an AR process to the first ~20 pcs for pc in range(0, n_pcs): # create the model arn = ARN(monthly_pcs[:,pc].squeeze()) # fit the model to the data res = arn.fit() arp = res.k_ar # create a timeseries of predicted values predicted_pcs[:,pc] = M*arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths+arp+11) # reconstruct the field and return # reconstruct the field monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts) return monthly_intvar
def eof_computation(var, varunits, lat, lon): #---------------------------------------------------------------------------------------- print( '____________________________________________________________________________________________________________________' ) print('Computing the EOFs and PCs') #---------------------------------------------------------------------------------------- # EOF analysis of a data array with spatial dimensions that # represent latitude and longitude with weighting. In this example # the data array is dimensioned (ntime, nlat, nlon), and in order # for the latitude weights to be broadcastable to this shape, an # extra length-1 dimension is added to the end: weights_array = np.sqrt(np.cos(np.deg2rad(lat)))[:, np.newaxis] start = datetime.datetime.now() solver = Eof(var, weights=weights_array) end = datetime.datetime.now() print('EOF computation took me %s seconds' % (end - start)) #ALL VARIANCE FRACTIONS varfrac = solver.varianceFraction() acc = np.cumsum(varfrac * 100) #------------------------------------------PCs unscaled (case 0 of scaling) pcs_unscal0 = solver.pcs() #------------------------------------------EOFs unscaled (case 0 of scaling) eofs_unscal0 = solver.eofs() #------------------------------------------PCs scaled (case 1 of scaling) pcs_scal1 = solver.pcs(pcscaling=1) #------------------------------------------EOFs scaled (case 2 of scaling) eofs_scal2 = solver.eofs(eofscaling=2) return solver, pcs_scal1, eofs_scal2, pcs_unscal0, eofs_unscal0, varfrac
def calc_HadISST_residual_EOFs(histo_sy, histo_ey, run_n): # load the already calculated residuals resid_fname = get_HadISST_residuals_fname(histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(resid_fname, 'r') lats_var = fh.variables["latitude"] lons_var = fh.variables["longitude"] attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] sst_data = numpy.ma.masked_equal(var[:], mv) # calculate the EOFs and PCs # take the eofs coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(sst_data, center=False, weights=wgts) pcs = eof_solver.pcs(npcs=None) eofs = eof_solver.eofs(neofs=None) # get the output names out_eofs_fname = get_HadISST_residual_EOFs_fname(histo_sy, histo_ey, run_n) out_pcs_fname = get_HadISST_residual_PCs_fname(histo_sy, histo_ey, run_n) # save the eofs and pcs save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var) save_pcs(out_pcs_fname, pcs, attrs) fh.close()
def eofs_as(dat): A = climatologia_xarray(dat['curl']).values global land EC, WC, land = get_coasts(dat.lat, dat.lon) msk = np.empty(np.shape(A)) for i in range(0, len(A[:,0,0])): msk[i,:,:] = land B = np.ma.array(A, mask=msk) from get_eddof import get_eddof edof = np.empty([len(dat.lat), len(dat.lon)]) for i in range(0, len(dat.lat)): for j in range(0, len(dat.lon)): if msk[0,i,j] == False: edof[i,j] = get_eddof(B[:,i,j]) else: edof[i,j] = np.nan dof = int(np.nanmean(edof)) coslat = np.cos(np.deg2rad(dat.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(B, center=True, weights=wgts, ddof=dof) eof = solver.eofs(neofs=10, eofscaling=2) pc = solver.pcs(npcs=10, pcscaling=1) varfrac = solver.varianceFraction() eigvals = solver.eigenvalues() x, y = np.meshgrid(dat.lon, dat.lat) return eof, pc, varfrac, x, y, edof
def calc_EOF2D(anom, nplat, coslat, varcode): # apply sqrt cos latitude weighting wgts = np.sqrt(coslat) wgts = wgts[:, np.newaxis] # leading EOF solver = Eof(anom, weights=wgts) eof1 = solver.eofs(neofs=1, eofscaling=0)[0] if varcode == 'PSL': if eof1[np.where(nplat >= 68)[0][0], 0] > 0: # PSL eof1 = -eof1 elif varcode == 'Z3': if eof1[np.where(nplat >= 75)[0][0], 0] > 0: # Z3 eof1 = -eof1 elif varcode == 'U': if eof1[np.where(nplat >= 60)[0][0], 0] < 0: # U eof1 = -eof1 # leading principal component PC1 = np.empty([anom.shape[0]]) for itime in range(anom.shape[0]): PC1[itime] = np.dot(anom[itime, :, :].flatten(), eof1.flatten()) return (eof1, PC1)
def smooth(variable, window) : from axis import Axes, TimeAxis from variable import Variable if len(variable.shape) > 1 : raise NotImplementedError try : variable.dts except : raise NotImplementedError if window%2 == 0 : raise NotImplementedError mask = np.ones(window) #mask[int(window/2)] = 1 mask /= window*1.0 newAxes = Axes() newAxes['time'] = TimeAxis(variable.dts[int(window/2):-int(window/2)]) return Variable( data = np.convolve(variable.data, mask, mode='valid'), axes = newAxes, metadata = variable.metadata) from eofs.standard import Eof wgts = np.cos(variable.lats*np.pi/180)**0.5 solver = Eof(variable.data, weights = wgts[:, None]) eof1 = solver.eofs(eofscaling=2, neofs=1) print solver.varianceFraction(neigs=1)[0]*100, '%' output = variable[0].empty() output.data = eof1[0] return output
def eof(variable) : from eofs.standard import Eof wgts = np.cos(variable.lats*np.pi/180)**0.5 solver = Eof(variable.data, weights = wgts[:, None]) eof1 = solver.eofs(eofscaling=2, neofs=1) print solver.varianceFraction(neigs=1)[0]*100, '%' output = variable[0].empty() output.data = eof1[0] return output
def reconstruct_data(arr, neofs=16): if type(neofs) == int: neofs = [neofs] solver = Eof(arr, center=False) for n in neofs: reconstructed = solver.reconstructedField(neofs=n) pcs = solver.pcs(npcs=n) eofs = solver.eofs(neofs=n) yield reconstructed, pcs, eofs
def create_monthly_intvar(run_type, ref_start, ref_end, n_pcs=22, run_n=400): # load in the PCs and EOFs histo_sy = 1899 histo_ey = 2010 # monthly_pc_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) # monthly_pcs = load_data(monthly_pc_fname) # monthly_eof_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) # monthly_eofs = load_sst_data(monthly_eof_fname, "sst") monthly_residuals_fname = get_HadISST_monthly_residuals_fname( histo_sy, histo_ey, run_n) # open netcdf_file fh = netcdf_file(monthly_residuals_fname, 'r') attrs = fh.variables["sst"]._attributes mv = attrs["_FillValue"] var = fh.variables["sst"] monthly_residuals = numpy.ma.masked_equal(var[:], mv) # weights for reconstruction / projection coslat = numpy.cos( numpy.deg2rad(numpy.arange(89.5, -90.5, -1)).clip(0., 1.)) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(monthly_residuals, center=False, weights=wgts) monthly_pcs = eof_solver.pcs(npcs=n_pcs) monthly_eofs = eof_solver.eofs(neofs=n_pcs) # get the explanation of variance and calculate the scalar from it M = 1.0 / numpy.sum(eof_solver.varianceFraction(neigs=n_pcs)) # get the number of months to predict the PCs for and create the storage histo_sy, histo_ey, rcp_sy, rcp_ey = get_start_end_periods() n_mnths = 12 * (rcp_ey - histo_sy) predicted_pcs = numpy.zeros([n_mnths + 12, n_pcs], "f") # fit an AR process to the first ~20 pcs for pc in range(0, n_pcs): # create the model arn = ARN(monthly_pcs[:, pc].squeeze()) # fit the model to the data res = arn.fit() arp = res.k_ar # create a timeseries of predicted values predicted_pcs[:, pc] = M * arn.predict(res.params, noise='all', dynamic=True, start=arp, end=n_mnths + arp + 11) # reconstruct the field and return # reconstruct the field monthly_intvar = reconstruct_field(predicted_pcs, monthly_eofs[:n_pcs], n_pcs, wgts) return monthly_intvar
def eof(in_bands): data = np.array([in_bands[i].data for i in range(len(in_bands))]) #take eof over time dimension solver = Eof(data) eof1 = solver.eofs(neofs=1)[0, :] cube = in_bands[0].copy() cube.data = eof1 pc1 = solver.pcs(pcscaling=1, npcs=1)[:, 0] var_frac = solver.varianceFraction(neigs=1)[0] return cube, pc1, var_frac
def calculate_IOBM(data, lats, lons, times, t_units, calendar): """ Calculate the Indian Ocean basin mode as the first EOF over the region 20S-20N, 40E-110E. See Yang et al (2007) doi:10.1029/2006GL028571""" data[np.abs(data) > 1e3] = np.nan annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar) lat_min, lat_max = -20, 20 lon_min, lon_max = 40, 110 lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) IO_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask] coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(IO_SST, weights=wgts) IOBM = solver.pcs(npcs=1, pcscaling=1).flatten() EOF1 = solver.eofs(neofs=1)[0, :, :] if np.nanmean(EOF1) < 0: IOBM = -IOBM IOBM = (IOBM - np.mean(IOBM)) / np.std(IOBM) return IOBM
def calculate_IPO(data, lats, lons, times, t_units, calendar): """ Calculate the Inter-decadal Pacific Oscillation index Calculated as the first EOF of SST 60S to 60N over the Pacific """ data[np.abs(data) > 1e3] = np.nan # set unreasonably high values to NaN annual_cycle_removed = remove_annual_cycle(data, times, t_units, calendar) lat_min, lat_max = -60, 60 lon_min, lon_max = 120, 270 lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask] coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(Pacific_SST, weights=wgts) IPO = solver.pcs(npcs=1, pcscaling=1).flatten() EOF1 = solver.eofs(neofs=1)[0, :, :] if np.nanmean(EOF1) < 0: IPO = -IPO IPO = (IPO - np.mean(IPO)) / np.std(IPO) return IPO
def calculate_PDO(data, lats, lons, times, t_units, calendar): """ Calculate the Pacific Decadal Oscillation index as the first PC of SST between 20N and 70N See Newman et al (2016) doi:10.1175/JCLI-D-15-0508.1""" data[np.abs(data) > 1e3] = np.nan # set unreasonably high values to NaN global_mean_removed = data - global_mean(data, lats).reshape( times.shape[0], 1, 1) annual_cycle_removed = remove_annual_cycle(global_mean_removed, times, t_units, calendar) lat_min, lat_max = 20, 70 lon_min, lon_max = 120, 270 lat_mask = (lats >= lat_min) & (lats <= lat_max) lon_mask = (lons >= lon_min) & (lons <= lon_max) N_Pacific_SST = annual_cycle_removed[:, lat_mask, :][:, :, lon_mask] coslat = np.cos(np.deg2rad(lats[lat_mask])) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(N_Pacific_SST, weights=wgts) EOF1 = solver.eofs(neofs=1)[0, :, :] PDO = solver.pcs(npcs=1, pcscaling=1).flatten() if np.nanmean(EOF1[:, lons[lon_mask] > 210]) < 0: PDO = -PDO PDO = (PDO - np.mean(PDO)) / np.std(PDO) return PDO
def calc_HadISST_monthly_residual_EOFs(histo_sy, histo_ey, ref_start, ref_end, run_n, n_eofs=22): # load the already calculated residuals resid_fname = get_HadISST_monthly_residuals_fname(histo_sy, histo_ey, run_n) # note that we don't have to subtract the annual cycle any more as the # residuals are with respect to a smoothed version of the monthly ssts resid_mon_fh = netcdf_file(resid_fname, 'r') sst_var = resid_mon_fh.variables["sst"] lats_var = resid_mon_fh.variables["latitude"] lons_var = resid_mon_fh.variables["longitude"] attrs = sst_var._attributes mv = attrs["_FillValue"] ssts = numpy.array(sst_var[:]) sst_resids = numpy.ma.masked_less(ssts, -1000) # calculate the EOFs and PCs # take the eofs coslat = numpy.cos(numpy.deg2rad(lats_var[:])).clip(0., 1.) wgts = numpy.sqrt(coslat)[..., numpy.newaxis] eof_solver = Eof(sst_resids, center=True, weights=wgts) pcs = eof_solver.pcs(npcs=n_eofs) eofs = eof_solver.eofs(neofs=n_eofs) varfrac = eof_solver.varianceFraction(neigs=n_eofs) evs = eof_solver.eigenvalues(neigs=n_eofs) evs = evs.reshape([1,evs.shape[0]]) print evs.shape # get the output names out_eofs_fname = get_HadISST_monthly_residual_EOFs_fname(histo_sy, histo_ey, run_n) out_pcs_fname = get_HadISST_monthly_residual_PCs_fname(histo_sy, histo_ey, run_n) out_evs_fname = get_HadISST_monthly_residual_EVs_fname(histo_sy, histo_ey, run_n) # save the eofs and pcs save_3d_file(out_eofs_fname, eofs, attrs, lats_var, lons_var) out_pcs = pcs.reshape([pcs.shape[0],1,pcs.shape[1]]) save_pcs(out_pcs_fname, out_pcs, attrs) save_eigenvalues(out_evs_fname, evs, attrs) resid_mon_fh.close()
def PCA(self, field_name): field_name = field_name start_interv = self.start_pca end_interv = self.end_pca observationPeriod = 'data_' + str(start_interv) + '_to_' + str(end_interv) modelData = np.load(self.directory_data + '/' + field_name + '_' + observationPeriod + '.npy') # Velocity is a 3D vector and needs to be reshaped before the PCA if 'Velocity' in field_name: modelData = np.reshape(modelData, (modelData.shape[0], modelData.shape[1] * modelData.shape[2]), order='F') # Standardise the data with mean 0 meanData = np.nanmean(modelData, 0) stdData = np.nanstd(modelData) modelDataScaled = (modelData - meanData) / stdData #PCA solver solver = Eof(modelDataScaled) # Principal Components time-series pcs = solver.pcs() # Projection eof = solver.eofs() # Cumulative variance varianceCumulative = np.cumsum(solver.varianceFraction()) np.save(self.directory_data + '/' + 'pcs_' + field_name + '_' + observationPeriod, pcs) np.save(self.directory_data + '/' + 'eofs_' + field_name + '_' + observationPeriod, eof) np.save(self.directory_data + '/' + 'varCumulative_' + field_name + '_' + observationPeriod, varianceCumulative) np.save(self.directory_data + '/' + 'mean_' + field_name + '_' + observationPeriod, meanData) np.save(self.directory_data + '/' + 'std_' + field_name + '_' + observationPeriod, stdData)
# Open the file cnc = camgoda(cfull_path) tnc = camgoda(tfull_path) is3d, var, vname = cnc.ExtractData(variable, box) is3d, var, vname = tnc.ExtractData(variable, box) if n == 0: nlats, nlons = cnc.data.shape boxlat = cnc.boxlat boxlon = cnc.boxlon d = np.zeros(shape=(len(dates), nlats * nlons)) d[n, :] = np.ndarray.flatten(tnc.data - cnc.data) # Compute the amplitude timeseries and EOF spatial distributions of the data array print "Computing the EOF..." EOF = Eof(d, center=removeMeans) eof = EOF.eofs(neofs=num_eofs) pca = EOF.pcs(npcs=num_eofs, pcscaling=1) varfrac = EOF.varianceFraction() print "Finished!" # Reshape F into a spatial grid eof_grid = np.reshape(eof, (eof.shape[0], nlats, nlons)) # Make the maps bmlon, bmlat = np.meshgrid(boxlon, boxlat) southern_lat = boxlat[0] northern_lat = boxlat[-1] left_lon = boxlon[0] right_lon = boxlon[-1] if 0 in boxlon[1:-2]: # if we cross the gml left_lon = boxlon[0] - 360
x0 = surfanom # per i cosi surface uso la anomaly di surface temperature #acos = acos[:, :n_alts] corrco = np.empty_like(acos[0]) for i in range(acos[0].shape[0]): corrco[i] = np.corrcoef(x0, acos[:, i])[1, 0] cico, regrco, _, _ = npl.linearregre_coeff(x0, acos) regrcoef[(cco2, conam, 'R')] = corrco regrcoef[(cco2, conam, 'c')] = cico regrcoef[(cco2, conam, 'm')] = regrco regrcoef['surfmean'] = np.mean(surftemps) regrcoef['amean'] = atm_anom_mean regrcoef['eof0'] = solver_anom.eofs(eofscaling=1)[0] regrcoef['eof1'] = solver_anom.eofs(eofscaling=1)[1] pickle.dump(regrcoef, open(cart_out_rep + 'regrcoef_v3.p', 'wb')) # for conam in ['acoeff', 'bcoeff']: # fig = plt.figure() # for ialt, col in zip(range(n_alts), npl.color_set(n_alts)): # plt.plot(np.abs(regrcoef[(cco2, conam, 'R')][:, ialt]), alts, color = col) # #plt.xlim(-0.02, 0.02) # plt.title(conam + ' - rcorr') # fig.savefig(cartou + '{}_rcorr.pdf'.format(conam)) # the scalar products between the temp anomalies and the first eof of the temperature profile dotprods = np.array([ np.dot(te - atm_anom_mean,
if args.normalize: eof_data_std = np.std(eof_data, axis=1) eof_data = eof_data.T / np.std(eof_data, axis=1) else: #transpose so time is first dimension eof_data = eof_data.T # Crete an EOF solver to do the EOF analysis. No weights # First dimension is assumed time by program... not true if timseries is of interest, print("Solving for n={} modes".format(args.eof_num)) solver = Eof(eof_data, center=False) pcs = solver.pcs(npcs=args.eof_num) eigval = solver.eigenvalues(neigs=args.eof_num) varfrac = solver.varianceFraction(neigs=args.eof_num) eofs = solver.eofs(neofs=args.eof_num) eofcorr = solver.eofsAsCorrelation(neofs=args.eof_num) eofcov = solver.eofsAsCovariance(neofs=args.eof_num) """---------------------------------Report-----------------------------------""" ### Print Select Results to file outfile = args.outfile + '.txt' print("EOF Results:", file=open(outfile, "w")) print("------------", file=open(outfile, "a")) print("File path: {}".format("/".join(filename.split('/')[:-1])), file=open(outfile, "a")) for key, filename in (files.items()): print("Files input: {}".format(filename.split('/')[-1]), file=open(outfile, "a")) print("\n\n", file=open(outfile, "a"))
axis=1) # add weighting function (because of the latitude) atemp_era5 = signal.detrend( hgt500_era5, axis=0) # # linearly detrend 500 hPa geopotential height data atemp_era5_pre = np.zeros((nt * ny, nlat, nlon)) for iy in np.arange(ny): atemp_era5_pre[iy * nt:iy * nt + nt] = atemp_era5[iy] * weightf[None, :, :] ### we did not using n-day moving average as some other studies do partly because the original goal for us (Jiacheng & Zhuo) ### is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible ### to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals ### To be consistent with other reseachers, one can add n-day moving code above. # EOF analysis solver = Eof(atemp_era5_pre, center=True) pcs = solver.pcs() mid_eig = solver.eigenvalues() mid_eofs = solver.eofs() eofs = solver.eofs() ### Print explained variance when using 4 EOFs #var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig)) #print(var_explained_era5[3]) #0.5316330300316366 reconstruction_era5 = solver.reconstructedField( noef) #Using 4 leading EOFs to reconstruct hgt500 field ### The Kmeans method needs a 2-D data format: number of days x horizontal fields atemp_era5_post = np.zeros((ny * nt, nlat * nlon)) for i in np.arange(ny * nt): atemp_era5_post[i] = (reconstruction_era5[i]).flatten()
#loop over seasons, selec data and performs boxplot SAM_erai = np.zeros([5, 36]) SAM_s4 = np.zeros([5, len(hgt_s4.realiz.values)]) eof_erai = np.zeros([5, len(hgt_erai.latitude.values)]) eof_s4 = np.zeros([5, len(hgt_s4.latitude.values)]) sign_s4 = np.array([1, 1, 1, 1, -1]) sign_erai = np.array([1, -1, -1, -1, 1]) for i in np.arange(0, 5): aux = hgt_erai['z'].resample(time='QS-' + lmonth[i]).mean(dim='time',skipna=True) mes = datetime.datetime.strptime(lmonth[i], '%b').month aux = aux.sel(time= np.logical_and(aux['time.month'] == mes, aux['time.year']!=2002)) X_zm = aux.mean(dim='longitude') X_an = X_zm - X_zm.mean(dim='time') solver = Eof(X_an.values) pcs = solver.pcs(npcs=1, pcscaling=1) eof_erai[i, :] = solver.eofs(neofs=1)[0,:] SAM_erai[i, :] = sign_erai[i] * pcs[:, 0] hgt_s4_smean = np.nanmean(np.nanmean(hgt_s4.z.values[i:i + 3, :, :, :], axis=0), axis=2) hgt_s4_smean = hgt_s4_smean - np.nanmean(hgt_s4_smean, axis=0) solver = Eof(hgt_s4_smean) pcs = solver.pcs(npcs=1, pcscaling=1) eof_s4[i, :] = solver.eofs(neofs=1)[0,:] SAM_s4[i, :] = sign_s4[i] * pcs[:, 0] time = np.concatenate([np.arange(1981,2002), np.arange(2003,2018)]) ds = xr.Dataset({'SAM_index': xr.DataArray(SAM_erai, coords=[('season', season),('year', time)])}) ds.to_netcdf(RUTA + 'fogt/SAM_index_erai.nc4') ds1 = xr.Dataset({'SAM_index': xr.DataArray(SAM_s4, coords=[('season', season),('realiz', np.arange(SAM_s4.shape[1]))])})
curl_cfsr_clim = curl_cfsr.groupby('time.month').mean('time') from eofs.standard import Eof # ncep coslat = np.cos(np.deg2rad(curl_ncep_clim.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(curl_ncep_clim.values, weights=wgts) var = solver.varianceFraction() plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() n = 1 eof_ncep = solver.eofs(neofs=n, eofscaling=2) pc_ncep = solver.pcs(npcs=n, pcscaling=1) vf_ncep = var[:n] # cfsr coslat = np.cos(np.deg2rad(curl_cfsr_clim.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(curl_cfsr_clim.values, weights=wgts) var = solver.varianceFraction() plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() n = 1 eof_cfsr = solver.eofs(neofs=n, eofscaling=2)
def main(mFilepath, xFilepath, yFilepath, window, windowFlag=True): ## load in the data matrix as a numpy array m = np.loadtxt(mFilepath, dtype='float', delimiter=',', skiprows=1) # lon = np.loadtxt(xFilepath, dtype='float', delimiter=',', skiprows=1) # lat = np.loadtxt(yFilepath, dtype='float', delimiter=',', skiprows=1) # time = np.arange('1958-01-01', '2014-09-22', dtype='datetime64') # years = range(1958, 2014) ## Create a list of dates spanning the study period base = dt.datetime(2014, 9, 21, 1, 1, 1, 1) dates = [base - dt.timedelta(days=x) for x in range(0, 20718)] date_list = [item for item in reversed(dates)] ## attempted to read in the raw data, but was struggling with ## the array dimensions # ncFiles = os.listdir(workspace) # slpList, lonList, latList, timeList = [], [], [], [] # for fileIn in ncFiles: # ncIn = Dataset(os.path.join(workspace, fileIn), 'r') # slpList.append(ncIn.variables['slp'][:]/100) # lonList.append(ncIn.variables['lon'][:]) # latList.append(ncIn.variables['lat'][:]) # timeList.append(ncIn.variables['time'][:]) # ncIn.close() # slp = np.array(slpList) # print(slp) # print(slp.shape) # # print(slp) # # print(np.shape(slp)) ## create an EOF solver object and extrac the first ## 4 EOFs and their associated PCs. Scaling can be ## applied if desired ## http://ajdawson.github.io/eofs/api/eofs.standard.html#eofs.standard.Eof solver = Eof(m) eofs = solver.eofs(neofs=4, eofscaling=0) pcs = solver.pcs(npcs=4, pcscaling=0) # lon, lat = np.meshgrid(lon, lat) ## plot the EOFs as nongeographic data for simplicity fig = plt.figure(figsize=(10, 10)) for i in range(4): ax = fig.add_subplot(2, 2, i+1) lab = 'EOF' + str(i + 1) main = 'Unscaled ' + lab eofPlot = eofs[i,].reshape(17, 32) plt.imshow(eofPlot, cmap=plt.cm.RdBu_r) plt.title(main) cb = plt.colorbar(orientation='horizontal', cmap=plt.cm.RdBu_r) cb.set_label(lab, fontsize=12) ## Basemap failure below. Something with the y cell size went wrong # bm = Basemap(projection='cyl', llcrnrlat=16.17951, urcrnrlat=68.48459, # llcrnrlon=-176.0393, urcrnrlon=-98.07901, resolution='c') # # bm.contourf(x, y, eof1.squeeze(), clevs, cmap=plt.cm.RdBu_r) # bm.drawcoastlines() # bm.drawstates() # im = bm.pcolormesh(lon, lat, eofPlot, cmap=plt.cm.RdBu_r, latlon=True) # # bm.fillcontinents(color='coral', lake_color='aqua') # bm.drawparallels(np.arange(-90.,91.,15.)) # bm.drawmeridians(np.arange(-180.,181.,30.)) # # bm.drawmapboundary(fill_color='aqua') # cb = plt.colorbar(orientation='horizontal') # cb.set_label(lab, fontsize=12) # plt.title(main, fontsize=16) # plt.show() plt.show() ## Plot the PCs as a time series fig = plt.figure(figsize=(16, 16)) for i in range(4): ylab = 'PC' + str(i+1) title = ylab + ' Time Series' pcPlot = pcs[:,i] if i==0: theAx = fig.add_subplot(4, 1, i+1) plt.setp(theAx.get_xticklabels(), visible=False) theAx.set_xlabel('') if i>0 and i<3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.setp(ax.get_xticklabels(), visible=False) if i==3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.xlabel('Date') plt.plot(date_list, pcPlot, color='b') if windowFlag: plt.plot(date_list, movingaverage(pcPlot, window), color='r', linestyle='-') plt.axhline(0, color='k') plt.title(title) plt.ylabel(ylab) plt.show() ## Subset the dates to the last year of the dataset short_date = [item for item in date_list if item >= dt.datetime(2013, 6, 17) and item < dt.datetime(2014, 6, 25)] indices = [date_list.index(item) for item in short_date] fig = plt.figure(figsize=(16, 16)) ## Plot out the last year of the PCs to get a more detailed ## pattern for comparison to the R results for i in range(4): ylab = 'PC' + str(i+1) title = ylab + ' Time Series (1 year)' pcPlot = pcs[np.array(indices),i] if i==0: theAx = fig.add_subplot(4, 1, i+1) plt.setp(theAx.get_xticklabels(), visible=False) theAx.set_xlabel('') if i>0 and i<3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.setp(ax.get_xticklabels(), visible=False) if i==3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.xlabel('Date') plt.plot(short_date, pcPlot, color='b') if windowFlag: plt.plot(short_date, movingaverage(pcPlot, window), color='r') plt.axhline(0, color='k') plt.title(title) plt.ylabel(ylab) plt.show() ## Subset the dates to the last year of the dataset decade = [item for item in date_list if item >= dt.datetime(2004, 6, 17) and item < dt.datetime(2014, 6, 17)] decadeIndices = [date_list.index(item) for item in decade] fig = plt.figure(figsize=(16, 16)) ## Plot out the last year of the PCs to get a more detailed ## pattern for comparison to the R results for i in range(4): ylab = 'PC' + str(i+1) title = ylab + ' Time Series (1 decade)' pcPlot = pcs[np.array(decadeIndices),i] if i==0: theAx = fig.add_subplot(4, 1, i+1) plt.setp(theAx.get_xticklabels(), visible=False) theAx.set_xlabel('') if i>0 and i<3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.setp(ax.get_xticklabels(), visible=False) if i==3: ax = fig.add_subplot(4, 1, i+1, sharex=theAx) plt.xlabel('Date') plt.plot(decade, pcPlot, color='b') if windowFlag: plt.plot(decade, movingaverage(pcPlot, window), color='r') plt.axhline(0, color='k') plt.title(title) plt.ylabel(ylab) plt.show()
def calculate_correlations_and_pvalues(var_pairs, label_to_vname_to_season_to_yearlydata: dict, season_to_months: dict, region_of_interest_mask, lakes_mask=None, lats=None) -> dict: """ :param var_pairs: :param label_to_vname_to_season_to_yearlydata: :param lats needed for weighting of eof solver :return: {(vname1, vname2): {label: {season: [corr, pvalue]}}}} """ res = {} for pair in var_pairs: pair = tuple(pair) res[pair] = {} for label in label_to_vname_to_season_to_yearlydata: res[pair][label] = {} for season in season_to_months: years_sorted = sorted(label_to_vname_to_season_to_yearlydata[label][pair[0]][season]) v1_dict, v2_dict = [label_to_vname_to_season_to_yearlydata[label][pair[vi]][season] for vi in range(2)] v1 = np.array([v1_dict[y] for y in years_sorted]) v2 = np.array([v2_dict[y] for y in years_sorted]) r = np.zeros(v1.shape[1:]).flatten() p = np.ones_like(r).flatten() v1 = v1.reshape((v1.shape[0], -1)) v2 = v2.reshape((v2.shape[0], -1)) # for hles and ice fraction get the eof of the ice and correlate if pair == ("hles_snow", "lake_ice_fraction"): # assume that v2 is the lake_ice_fraction v_lake_ice = v2 positions_hles_region = np.where(region_of_interest_mask.flatten())[0] positions_lakes = np.where(lakes_mask.flatten())[0] v_lake_ice = v_lake_ice[:, positions_lakes] # calculate anomalies v_lake_ice = v_lake_ice - v_lake_ice.mean(axis=0) weights = np.cos(np.deg2rad(lats.flatten()[positions_lakes])) ** 0.5 solver = Eof(v_lake_ice, weights=weights[..., np.newaxis]) print(label, solver.varianceFraction(neigs=10)) # use the module of the PC1 to make sure it has physical meaning pc1_ice = solver.pcs(npcs=1)[:, 0] # debug: plot eof eofs = solver.eofs(neofs=1) eof_2d = np.zeros_like(lats).flatten() eof_2d[positions_lakes] = eofs[:, 0] * pc1_ice eof_2d = eof_2d.reshape(lats.shape) plt.figure() im = plt.pcolormesh(eof_2d.T) plt.colorbar(im) plt.show() if True: raise Exception # print(positions) for i in positions_hles_region: r[i], p[i] = pearsonr(v1[:, i], pc1_ice) else: positions = np.where(region_of_interest_mask.flatten()) # print(positions) for i in positions[0]: r[i], p[i] = pearsonr(v1[:, i], v2[:, i]) r.shape = region_of_interest_mask.shape p.shape = region_of_interest_mask.shape r = np.ma.masked_where(~region_of_interest_mask, r) p = np.ma.masked_where(~region_of_interest_mask, p) res[pair][label][season] = [r, p] return res
if not os.path.exists(cartou): os.mkdir(cartou) temps = [atm_pt[(atm, 'temp')][:n_alts] for atm in allatms] temps = np.stack(temps) temps_anom = np.stack([ atm_pt[(atm, 'temp')][:n_alts] - np.mean(atm_pt[(atm, 'temp')][:n_alts]) for atm in allatms ]) atm_anom_mean = np.mean(temps_anom, axis=0) solver = Eof(temps) solver_anom = Eof(temps_anom) fig = plt.figure() for i, eo in enumerate(solver.eofs()): plt.plot(eo, alts, label=i) plt.legend() fig.savefig(cartou + 'eofs_temps.pdf') fig = plt.figure() for i, eo in enumerate(solver_anom.eofs()): plt.plot(eo, alts, label=i) plt.legend() fig.savefig(cartou + 'eofs_temps_anom.pdf') fig = plt.figure() plt.bar(np.arange(6), solver.eigenvalues()) fig.savefig(cartou + 'eigenvalues_temps.pdf') fig = plt.figure()
dat = xarray.open_dataset( '/home/bock/Documents/tesis/datos/ncep2_atlsur_2009_2015.nc') clim_nc = dat['curl'].groupby('time.month').mean('time').sel( lat=slice(-20, -40), lon=slice(-64, -22)) coslat = np.cos(np.deg2rad(clim_nc.lat.values)).clip(0., 1.) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(clim_nc.values, weights=wgts) var = solver.varianceFraction() plt.figure(1) plt.bar(np.arange(0, len(var), 1), var * 100) plt.show() plt.close() n = input('Cuantos PC extraer: ') n = int(n) eof = solver.eofs(neofs=n, eofscaling=2) pc = solver.pcs(npcs=n, pcscaling=1) vf = var[:n] fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[0] * 1e7, cm.GMT_no_green, np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m') plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof1_ncep.png', bbox_inches='tight') plt.show() fig, ax = plotterchico(clim_nc.lat, clim_nc.lon, eof[1] * 1e7, cm.GMT_no_green, np.arange(-0.5, 0.51, .1), '', '10$^{-7}$ Pa/m') plt.savefig('/home/bock/Documents/tesis/resultados/figs/eof2_ncep.png', bbox_inches='tight') plt.show() dat1 = xarray.open_dataset(
def PCA_Analyze(name, framestart, framestop, destination, numberpca=None): """Completes unweighted and unscaled Principle Component Analyzis on data. **Arguments:** *name* The complete name of the numpy array file from the directory where the processing program is found. Put as string (include quotes). Must be an npz file. Specifically, this is the numpy array file that has the UU,VV,WW data in it. Instead of putting the name of the numpy file (since there are a large number of them) input an asterisk (*). *framestart* The first frame number in the sequence of frames to be analyzed. *framestop* The last frame number in the sequence of frames to be analyzed. *destination* File location for the graph to be saved. Put in quotes. Example: 'out/Vertical Velocity/arrays/another/graph.png' **Optional keyword arguments:** *numberpca* Number of valid eigenvalues/PCAs to be calculated. Automatically set to determine all of them. **Example:** PCA_Analyze('../out/velocity.npz',0,5,'../out/mvavgtur.png',numberpca=4) """ #####Creates Lists and Dictionaries to be used later##### UU = {} lUU = [] VV = {} lVV = [] #####Extracts numpy array data and puts it into the dictionaries for use in analysis##### for np_name in glob.glob(name): with np.load(np_name) as data: UU[re.findall(r'\d+', np_name)[-1]] = data['UU'] VV[re.findall(r'\d+', np_name)[-1]] = data['VV'] #####Takes the data from the dictionaries, sorts them, and then puts them into lists. Then turns the list into a numpy array.##### uframes = UU.keys() uframes.sort() vframes = VV.keys() vframes.sort() for i in uframes: u = UU[i] lUU.append(u) for i in vframes: v = VV[i] lVV.append(v) luu = np.asarray(lUU) lvv = np.asarray(lVV) #####Puts the U and V components into one complex array with the U as the real component and the V as the imaginary##### velgrid = luu + (1.j * lvv) #####PCA##### solver = Eof(velgrid[framestart:framestop, :, :]) pca = solver.eofs(neofs=numberpca) eigen = solver.eigenvalues(neigs=numberpca) pca = np.array(pca) eigen = np.array([eigen]) intermed = eigen[0].shape length = intermed[0] print length #####Graphs each PCA##### c = 0 for i in range(length): UU = pca.real[i, :, :] VV = pca.imag[i, :, :] eig = np.array_str(eigen[0][i]) (a, b) = pca[0].shape y, x = np.mgrid[0:a, 0:b] plt.figure() plt.streamplot(x, y, UU * -1., VV * -1., cmap='nipy_spectral') plt.suptitle("PCA Analysis. Associated Percent Variance: ") plt.title(eig, fontsize=10) plt.savefig(destination % i) plt.close() c += 1
data=load_regional(filenames,ny,nx) data=np.ma.masked_values(data,2.e+20) print "data loaded",data.shape # Set up info plt.set_cmap('RdBu') neofs=5 nens=data.shape[0] nwanted=57 solver=Eof(data) print 'set up EOF solver' pcs=solver.pcs(npcs=neofs,pcscaling=1) eofs=solver.eofs(neofs=neofs) varfrac=solver.varianceFraction(neigs=neofs) print 'calculated EOFs' print 'printing EOFs' for i in range(neofs): print 'EOF',i plt.clf() plot_region_pnw(eofs[i,:],lat_coord,lon_coord,0,-1,0,-1,'EOF'+str(i),varfrac[i]) print "plotting histograms of PCs" for i in range(3): plt.clf() plt.hist(pcs[:,i],200,range=(-4,4),normed=1,alpha=0.4,label='pc'+str(i)) plt.ylim([0,.6]) plt.savefig(output_dir+'/histogram_pc'+str(i)+'.png') print "plotting mean and stdev of ensemble" plot_region_pnw(data[:].mean(0),lat_coord,lon_coord,0,-1,0,-1,'mean',data.mean())
filename1 = 'sine_wave_data1.nc' filename2 = '2D_bulls_eyes.nc' a = Dataset(filename1, mode='r') b = Dataset(filename2, mode='r') dataset1 = xr.open_dataset(xr.backends.NetCDF4DataStore(a)) dataset2 = xr.open_dataset(xr.backends.NetCDF4DataStore(b)) sinData = dataset1['data'].T sinData = (sinData - sinData.mean(axis=0)) / sinData.std(axis=0) sinData = sinData.values bullseyeData = dataset2['data'] #%% EOF analysis solver = Eof(sinData) eigenvalues = solver.eigenvalues() # Get eigenvalues EOFs = solver.eofs(eofscaling=0) # Get EOFs EOFs_reg = solver.eofsAsCorrelation( ) # Get EOFs as correlation b/w PCs & orig data PCs = solver.pcs(pcscaling=1) # Get PCs # Get variance explained and # of PCs VarExplain = np.round(solver.varianceFraction() * 100, 1) numPCs2Keep = cumSUM(VarExplain, 90) # Calculate EOFs EOF1 = EOFs[0, :] * np.sqrt(eigenvalues[0]) # Get EOF 1 & scale it EOF2 = EOFs[1, :] * np.sqrt(eigenvalues[1]) # Get EOF 2 & scale it EOF1_reg = EOFs_reg[0, :] EOF2_reg = EOFs_reg[1, :] stdPC1 = PCs[:, 0] stdPC2 = PCs[:, 1]