def calc_matrices(invar, lon, lat, return_all=False): """ Calculate correlation, covariance, and distance matrices in preparation for clustering. Parameters ---------- invar : ARRAY (Time x Lat x Lon) Input variable lon : ARRAY (Lon) Longitudes lat : ARRAY (Lat) Latitudes return_all : BOOL, optional Set to true to return non-nan points, indices, and coordinates. The default is False. Returns ------- srho: ARRAY [npts x npts] Correlation Matrix scov: ARRAY [npts x npts] Covariance Matrix sdist: ARRAY [npts x npts] Distance Matrix """ # --------------------- # Remove All NaN Points # --------------------- ntime, nlat, nlon = invar.shape varrs = invar.reshape(ntime, nlat * nlon) okdata, knan, okpts = proc.find_nan(varrs, 0) npts = okdata.shape[1] # --------------------------------------------- # Calculate Correlation and Covariance Matrices # --------------------------------------------- srho = np.corrcoef(okdata.T, okdata.T) scov = np.cov(okdata.T, okdata.T) srho = srho[:npts, :npts] scov = scov[:npts, :npts] # -------------------------- # Calculate Distance Matrix # -------------------------- lonmesh, latmesh = np.meshgrid(lon, lat) coords = np.vstack([lonmesh.flatten(), latmesh.flatten()]).T coords = coords[okpts, :] coords1 = coords.copy() coords2 = np.zeros(coords1.shape) coords2[:, 0] = np.radians(coords1[:, 1]) # First point is latitude coords2[:, 1] = np.radians(coords1[:, 0]) # Second Point is Longitude sdist = haversine_distances(coords2, coords2) * 6371 if return_all: return srho, scov, sdist, okdata, okpts, coords2 return srho, scov, sdist
for idx in tqdm(range(nint)): # Portions copied from script below # Read out the values clusterin = clusts[idx] uncertin = uncert[idx] s_in = s_all[idx] s_byclust_in = s_by_clust[idx] countin = count[idx] rngin = rngs[idx] rempts_in = rempts[idx] # Recover clusterout for silhouette plotting remmask = rempts_in.copy() remmask[~np.isnan(remmask)] = np.nan # Convert all removed points to NaN remmask[np.isnan(rempts_in)] = 1 clusterout,knan,okpts = proc.find_nan((clusterin*remmask).flatten(),0) # Ugly Fix, but not sure why sometimes s_in size doesnt match clusterin (non-nan) if len(clusterout) != len(s_in): print("Mismatch between clusterout (%i) and s_in (%i) for interval %i" % (len(clusterout), len(s_in),idx)) clusterout,knan,okpts = proc.find_nan((clusterin).flatten(),0) # Make Silhouette Map silmap = np.zeros(nlat5*nlon5)*np.nan silmap[okpts] = s_in silmap = silmap.reshape(nlat5,nlon5) silmap_all[idx,:,:] = silmap # Reassign clusters
def cluster_ssh(sla, lat, lon, nclusters, distthres=3000, returnall=False): # Remove All NaN Points ntime, nlat, nlon = sla.shape slars = sla.reshape(ntime, nlat * nlon) okdata, knan, okpts = proc.find_nan(slars, 0) npts = okdata.shape[1] # --------------------------------------------- # Calculate Correlation and Covariance Matrices # --------------------------------------------- srho = np.corrcoef(okdata.T, okdata.T) scov = np.cov(okdata.T, okdata.T) srho = srho[:npts, :npts] scov = scov[:npts, :npts] # -------------------------- # Calculate Distance Matrix # -------------------------- lonmesh, latmesh = np.meshgrid(lon, lat) coords = np.vstack([lonmesh.flatten(), latmesh.flatten()]).T coords = coords[okpts, :] coords1 = coords.copy() coords2 = np.zeros(coords1.shape) coords2[:, 0] = np.radians(coords1[:, 1]) # First point is latitude coords2[:, 1] = np.radians(coords1[:, 0]) # Second Point is Longitude sdist = haversine_distances(coords2, coords2) * 6371 # -------------------------- # Combine the Matrices # -------------------------- a_fac = np.sqrt( -distthres / (2 * np.log(0.5))) # Calcuate so exp=0.5 when distance is 3000km expterm = np.exp(-sdist / (2 * a_fac**2)) distance_matrix = 1 - expterm * srho # -------------------------- # Do Clustering (scipy) # -------------------------- cdist = squareform(distance_matrix, checks=False) linked = linkage(cdist, 'weighted') clusterout = fcluster(linked, nclusters, criterion='maxclust') # ------------------------- # Calculate the uncertainty # ------------------------- uncertout = np.zeros(clusterout.shape) uncertsig = np.zeros(clusterout.shape) for i in range(len(clusterout)): covpt = scov[i, :] # cid = clusterout[i] # covin = covpt[np.where(clusterout == cid)] covout = covpt[np.where(clusterout != cid)] uncertpt = np.mean(covin) / np.mean(covout) uncertout[i] = uncertpt # -------------------------------------------- # Monte-Carlo Analysis to compute significance # -------------------------------------------- sigpt = monte_carlo_cluster(uncertpt, covpt, len(covin), mciter=1000, p=0.05, tails=2) uncertsig[i] = sigpt # Apply rules from Thompson and Merrifield (Do this later) # if uncert > 2, set to 2 # if uncert <0.5, set to 0 #uncertout[uncertout>2] = 2 #uncertout[uncertout<0.5] = 0 # ----------------------- # Replace into full array # ----------------------- clustered = np.zeros(nlat * nlon) * np.nan clustered[okpts] = clusterout clustered = clustered.reshape(nlat, nlon) cluster_count = [] for i in range(nclusters): cid = i + 1 cnt = (clustered == cid).sum() cluster_count.append(cnt) print("Found %i points in cluster %i" % (cnt, cid)) uncert = np.zeros(nlat * nlon) * np.nan uncert[okpts] = uncertout uncert = uncert.reshape(nlat, nlon) if returnall: return clustered, uncert, uncertsig, cluster_count, srho, scov, sdist, distance_matrix return clustered, uncert, uncertsig, cluster_count
fig, ax = plt.subplots( 1, 1, subplot_kw={'projection': ccrs.PlateCarree(central_longitude=0)}) ax = slutil.add_coast_grid(ax) pcm = ax.pcolormesh(lon5, lat5, ptmap, cmap='bone', transform=ccrs.PlateCarree(), alpha=0.88) fig.colorbar(pcm, ax=ax) ax.set_title("Removed Zero Points") # --- # Visualize Filter Transfer Function # --- okdata, knan, okpts = proc.find_nan(slars, 0) npts5 = okdata.shape[1] lpdata = okdata.copy() rawdata = ssha.reshape(ntimer, nlat5 * nlon5)[:, okpts] lpspec, rawspec, p24, filtxfer, fig, ax = slutil.check_lpfilter(rawdata, lpdata, xtk[1], M, tw, dt=24 * 3600 * 30) plt.savefig("%sFilter_Transfer_%imonLP_%ibandavg_%s.png" % (expdir, tw, M, expname), dpi=200) # ---
fig, ax = plt.subplots( 1, 1, subplot_kw={'projection': ccrs.PlateCarree(central_longitude=0)}) ax = slutil.add_coast_grid(ax) pcm = ax.pcolormesh(lon5, lat5, ptmap, cmap='bone', transform=ccrs.PlateCarree(), alpha=0.88) fig.colorbar(pcm, ax=ax) ax.set_title("Removed Zero Points") # --- # Visualize Filter Transfer Function # --- okdata, knan, okpts = proc.find_nan(slars, 0) npts5 = okdata.shape[1] lpdata = okdata.copy() rawdata = ssha.reshape(ntimer, nlat5 * nlon5)[:, okpts] lpspec, rawspec, p24, filtxfer, fig, ax = slutil.check_lpfilter(rawdata, lpdata, xtk[1], M, tw, dt=24 * 3600 * 30) plt.savefig("%sFilter_Transfer_%imonLP_%ibandavg_%s.png" % (expdir, tw, M, expname), dpi=200) # ---
manom, invar = proc.calc_clim( invar, 0, returnts=1) # Calculate clim with time in axis 0 vanom = invar - manom[None, :, :, :] vanom = vanom.reshape(nmon, nlat, nlon) # Reshape back to [time x lat x lon] # Flip latitude if lat[0] > lat[-1]: # If latitude is decreasing... lat = np.flip(lat) vanom = np.flip(vanom, axis=1) # Detrend the variable (taken from calc_amv_hadisst.py) # ---------------------------------------------------- start = time.time() indata = vanom.reshape(nmon, nlat * nlon).T # Transpose to [Space x Time] okdata, knan, okpts = proc.find_nan(indata, 1) x = np.arange(0, nmon, 1) if detrend == 0: # Compute global weighted average glomean = proc.area_avg(vanom.transpose(2, 1, 0), [0, 360, -90, 90], lon, lat, 1) # Regress back to the original data to get the global component beta, b = proc.regress_2d(glomean, okdata) # Subtract this from the original data okdt = okdata - beta[:, None] else: # Polynomial Detrend okdt, model = proc.detrend_poly(x, okdata, detrend) if debug:
sstnp = sstnp.transpose(0,3,1,2) #[model x time x lon x lat] sstrs = sstnp.reshape(4,nmon,pointsize) # Preallocate autocorr_all = np.ones((4,12,len(lags),nlonr,nlatr)) * np.nan for e in range(4): enstime = time.time() # Get ensemble [time x space] sstens = sstrs[e,:,:] # Isolate non-nan points, summing along dimension zero oksst,knan,okpts = proc.find_nan(sstens,0) # Get dimensions and reshape the time to [month x yr x space] timedim,spacedim = oksst.shape oksst = np.reshape(oksst,(int(timedim/12),12,spacedim)) oksst = np.transpose(oksst,(1,0,2)) # Preallocate and loop for each month... autocorrm = np.ones((12,len(lags),spacedim)) * np.nan # Loop for the months for m in range(12): # Calculate autocorrelation for that month autocorrm[m,:,:] = proc.calc_lagcovar_nd(oksst,oksst,lags,m+1,0)
color='k') ax.plot(slapt_ss, label="Estimated Cycle", color='red') ax.plot(slaptrm.squeeze(), label="Deseasonalized Data", color='b') #%% Check to see if the dataset has strong seasonal cycle # fig,ax = plt.subplots(1,1) # pcm = ax.pcolormesh(lon5,lat5,sla_5deg[0,:,:]) # ax.scatter([]) #%% 4.5) Remove NaN points and Examine Low pass filter slars = sla_lp.reshape(ntime, nlat5 * nlon5) # Locate only non-Nan points okdata, knan, okpts = proc.find_nan(slars, 0) npts = okdata.shape[1] # Quick check low pass filter transfer function lpdata = okdata.copy() rawdata = sla_5deg.reshape(ntime, nlat5 * nlon5)[:, okpts] lpspec = [] rawspec = [] npts5 = okdata.shape[1] for i in tqdm(range(npts5)): X_spec, freq, _ = tbx.bandavg_autospec(rawdata[:, i], dt, M, .05) X_lpspec, _, _ = tbx.bandavg_autospec(lpdata[:, i], dt, M, .05) lpspec.append(X_lpspec) rawspec.append(X_spec) lpspec = np.array(lpspec) rawspec = np.array(rawspec)
bbox_NA = [-80, 0, 0, 65] regions = ("SPG", "STG", "TRO", "NAT") bboxes = (bbox_SP, bbox_ST, bbox_TR, bbox_NA) stdboxes = [] stdval = [] for r in range(4): # Select data from region bbox = bboxes[r] datr, _, _ = proc.sel_region(fstd, clon1, clat, bbox) # Make Data 1D and remove NaN points datr = datr.flatten() datr, _, _ = proc.find_nan(datr, 0) # Append data stdboxes.append(datr) stdval.append(np.mean(datr)) # Create Plot fig, ax = plt.subplots(1, 1, figsize=(6, 4)) plt.style.use("seaborn") bp = ax.boxplot(stdboxes, 0, '', labels=regions) # Note Outlier POints are not shown ax.set_xlabel("Region") ax.set_ylabel("Standard Deviation") ax.set_title("$\sigma_{Forcing}$ CESM1LE 42-member Average") ax.set_ylim([0, 0.8]) plt.savefig("%sForcing_Stdev_Regional.png" % (outpathfig), dpi=200)
print("Data loaded in %.2fs" % (time.time() - st)) #%% Calculate ENSO # Apply Area Weight _, Y = np.meshgrid(lon, lat) wgt = np.sqrt(np.cos(np.radians(Y))) # [lat x lon] ts = ts * wgt[None, :, :] # Reshape for ENSO calculations ntime, nlat, nlon = ts.shape ts = ts.reshape(ntime, nlat * nlon) # [time x space] ts = ts.T #[space x time] # Remove NaN points okdata, knan, okpts = proc.find_nan(ts, 1) # Find Non-Nan Points oksize = okdata.shape[0] # Calcuate monthly anomalies okdata = okdata.reshape(oksize, int(ntime / 12), 12) # [space x yr x mon] manom = okdata.mean(1) tsanom = okdata - manom[:, None, :] #tsanom = tsanom.reshape(nlat*nlon,ntime) nyr = tsanom.shape[1] eofall = np.zeros((nlat * nlon, 12, pcrem)) * np.nan # [space x month x pc] pcall = np.zeros((nyr, 12, pcrem)) * np.nan # [year x month x pc] varexpall = np.zeros((12, pcrem)) * np.nan #[month x pc] # Compute EOF!! for m in range(12):
np.save(datpath + "AVISO_GMSL_%s_%s.npy" % (start, end), gmslrem) else: print("GMSL Not Removed") # --------------------- #%% Remove Seasonal Cycle # --------------------- if rem_seas: print("Removing Seasonal Cycle!") # Copy and reshape data sshc = ssha.copy() ntime = sshc.shape[0] sshc = sshc.reshape(ntime, nlat5 * nlon5) # Get non nan points okdata, knan, okpts = proc.find_nan(sshc, 0) # Remove seasonal cycle x, E = proc.remove_ss_sinusoid(okdata, semiannual=True) ssh_ss = E @ x okdata_ds = okdata - ssh_ss # Replace into data sshnew = np.zeros(sshc.shape) * np.nan sshnew[:, okpts] = okdata_ds sshnew = sshnew.reshape(ntime, nlat5, nlon5) sshss = np.zeros(sshc.shape) * np.nan sshss[:, okpts] = ssh_ss sshss = sshss.reshape(ntime, nlat5, nlon5)
for idx in tqdm(range(nint)): # Portions copied from script below # Read out the values clusterin = clusts[idx] uncertin = uncert[idx] s_in = s_all[idx] s_byclust_in = s_by_clust[idx] countin = count[idx] rngin = rngs[idx] rempts_in = rempts[idx] # Recover clusterout for silhouette plotting remmask = rempts_in.copy() remmask[~np.isnan(remmask)] = np.nan # Convert all removed points to NaN remmask[np.isnan(rempts_in)] = 1 clusterout, knan, okpts = proc.find_nan((clusterin * remmask).flatten(), 0) # Ugly Fix, but not sure why sometimes s_in size doesnt match clusterin (non-nan) if len(clusterout) != len(s_in): print( "Mismatch between clusterout (%i) and s_in (%i) for interval %i" % (len(clusterout), len(s_in), idx)) clusterout, knan, okpts = proc.find_nan((clusterin).flatten(), 0) # Make Silhouette Map silmap = np.zeros(nlat5 * nlon5) * np.nan silmap[okpts] = s_in silmap = silmap.reshape(nlat5, nlon5) silmap_all[idx, :, :] = silmap # Reassign clusters
#%% Perform Linear Detrend on SST and annually averaged sst (Detrend First) usedtfunction = 1 # Set to 1 to use the new detrending function if usedtfunction == 1: start = time.time() dt_hsst, ymodall, _, _ = proc.detrend_dim(hsstnew, 2) print("Detrended in %.2fs" % (time.time() - start)) else: # Reshape to [Time x Space] and remove NaN Points start = time.time() hsstnew = np.reshape(hsstnew, (360 * 180, 1176)).T hsstok, knan, okpts = proc.find_nan(hsstnew, 0) tper = np.arange(0, hsstok.shape[0]) beta, b = proc.regress_2d(tper, hsstok) # Perform regression # Detrend dt_hsst = hsstnew[:, okpts] - (beta[:, None] * tper + b[:, None]).T # Replace NaN vaues back into the system hsstall = np.zeros(hsstnew.shape) * np.nan hsstall[:, okpts] = dt_hsst # Also save the linear model ymodall = np.zeros(hsstnew.shape) * np.nan ymodall[:, okpts] = (beta[:, None] * tper + b[:, None]).T
dsfirst = dsfirst - np.mean(dsfirst,axis=2)[:,:,None,:] dsfirst = np.reshape(dsfirst,(360,180,hsstnew.shape[2])) # Detrend # start= time.time() # dtdsfirst,dsymodall,_,_ = proc.detrend_dim(dsfirst,2) # print("Detrended in %.2fs" % (time.time()-start)) # Detrend nlon = 360 nlat = 180 nmon = nyrs*12 start= time.time() indata = dsfirst.reshape(nlon*nlat,nmon) okdata,knan,okpts = proc.find_nan(indata,1) x = np.arange(0,nmon,1) if method == 0: # Calculate global mean SST glomean = okdata.mean(0) # Regress back to the original data to get the global component beta,b=proc.regress_2d(glomean,okdata) # Subtract this from the original data okdt = okdata - beta[:,None] # Calculate quadratic trend else: okdt,model = proc.detrend_poly(x,okdata,method)
def return_ar1_model(invar, simlen): """ Creates AR1 model for input timeseries [invar] thru the following steps: 1. Calculate Lag 1 Correlation Coefficient (R) and Effective DOF 2. Calculates variance of noise sigma = sqrt[(1-R^2)*var(invar)] 3. Integrate y(t) = R*y(t-1) + N(0,sigma) for [simlen] steps Inputs ------ 1) invar [time x lat x lon] - input variable 2) simlen [int] - simulation length Outputs ------- 1) rednoisemodel [simlen x lat x lon] 2) ar1_map [lat x lon] 3) neff_map [lat x lon] """ # -------------------------------- # Part 1: Calculate AR1 and N_eff # -------------------------------- # Remove NaNs ntime, nlat5, nlon5 = invar.shape invar = invar.reshape(ntime, nlat5 * nlon5) okdata, knan, okpts = proc.find_nan(invar, dim=0) npts = invar.shape[1] nok = okdata.shape[1] # Compute Lag 1 AR for each and effective DOF ar1 = np.zeros(nok) neff = np.zeros(nok) for i in range(nok): ts = okdata[:, i] r = np.corrcoef(ts[1:], ts[:-1])[0, 1] ar1[i] = r neff[i] = ntime * (1 - r) / (1 + r) # Replace into domain ar1_map = np.zeros(npts) * np.nan neff_map = np.zeros(npts) * np.nan ar1_map[okpts] = ar1 neff_map[okpts] = neff ar1_map = ar1_map.reshape(nlat5, nlon5) neff_map = neff_map.reshape(nlat5, nlon5) # --------------------------------------- # Part 2: Get variance and make AR1 model # --------------------------------------- # Calulate variance of noise invar = invar.reshape(ntime, nlat5, nlon5) n_sigma = np.sqrt((1 - ar1_map**2) * np.var(invar, 0)) # Create model rednoisemodel = np.zeros((simlen, nlat5, nlon5)) noisets = np.random.normal(0, 1, rednoisemodel.shape) noisets *= n_sigma[None, :, :] for i in range(1, simlen): rednoisemodel[ i, :, :] = ar1_map * rednoisemodel[i - 1, :, :] + noisets[i, :, :] # --------------------------- # Apply landice mask to model # --------------------------- msk = invar.copy() msk = msk.sum(0) msk[~np.isnan(msk)] = 1 rednoisemodel *= msk[None, :, :] vardiff = (np.var(invar, 0)) - np.var(rednoisemodel, 0) #print("maximum difference in variance is %f"% np.nanmax(np.abs(vardiff))) return rednoisemodel, ar1_map, neff_map
pslglo = pslglo - pslglo.mean(0)[None, :, :] # Preallocate pcall = np.zeros((nens, nyr, N_mode)) # [ens x year x pc] varexpall = np.zeros((nens, N_mode)) # [ens x pc] eofall = np.zeros((nens, nlat, nlon, N_mode)) # [ens x lat x lon x pc] for e in range(nens): startloop = time.time() # Select ensemble [Space x Time] varens = pslnao[e, :, :] varglo = pslglo[e, :, :] # Get rid of NaN points okdata, knan, okpts = proc.find_nan(varens, 1) okdatap, knanp, okptsp = proc.find_nan(varglo, 1) #% Perform EOF ------------- _, pcs, varexpall[e, :] = proc.eof_simple(okdata, N_mode, 1) # Standardize pc before regression along the time dimension pcstd = np.squeeze(pcs / np.std(pcs, 0)) #Loop for each mode... (NOte, can vectorize this to speed it up,.) psleofs = np.ones((192, 288, 3)) for pcn in range(N_mode): # Regress back to SLP eofpatokp, _ = proc.regress_2d(pcstd[:, pcn], okdatap, nanwarn=0)