Example #1
0
def calc_matrices(invar, lon, lat, return_all=False):
    """
    Calculate correlation, covariance, and distance matrices in preparation
    for clustering.

    Parameters
    ----------
    invar : ARRAY (Time x Lat x Lon)
        Input variable
    lon : ARRAY (Lon)
        Longitudes
    lat : ARRAY (Lat)
        Latitudes
    return_all : BOOL, optional
        Set to true to return non-nan points, indices, and coordinates. The default is False.

    Returns
    -------
    srho: ARRAY [npts x npts]
        Correlation Matrix
    scov: ARRAY [npts x npts]
        Covariance Matrix
    sdist: ARRAY [npts x npts]
        Distance Matrix

    """

    # ---------------------
    # Remove All NaN Points
    # ---------------------
    ntime, nlat, nlon = invar.shape
    varrs = invar.reshape(ntime, nlat * nlon)
    okdata, knan, okpts = proc.find_nan(varrs, 0)
    npts = okdata.shape[1]

    # ---------------------------------------------
    # Calculate Correlation and Covariance Matrices
    # ---------------------------------------------
    srho = np.corrcoef(okdata.T, okdata.T)
    scov = np.cov(okdata.T, okdata.T)
    srho = srho[:npts, :npts]
    scov = scov[:npts, :npts]

    # --------------------------
    # Calculate Distance Matrix
    # --------------------------
    lonmesh, latmesh = np.meshgrid(lon, lat)
    coords = np.vstack([lonmesh.flatten(), latmesh.flatten()]).T
    coords = coords[okpts, :]
    coords1 = coords.copy()
    coords2 = np.zeros(coords1.shape)
    coords2[:, 0] = np.radians(coords1[:, 1])  # First point is latitude
    coords2[:, 1] = np.radians(coords1[:, 0])  # Second Point is Longitude
    sdist = haversine_distances(coords2, coords2) * 6371

    if return_all:
        return srho, scov, sdist, okdata, okpts, coords2
    return srho, scov, sdist
Example #2
0
for idx in tqdm(range(nint)): # Portions copied from script below
    
    # Read out the values
    clusterin    = clusts[idx] 
    uncertin     = uncert[idx]
    s_in         = s_all[idx]
    s_byclust_in = s_by_clust[idx]
    countin      = count[idx]
    rngin        = rngs[idx]
    rempts_in    = rempts[idx]
    
    # Recover clusterout for silhouette plotting
    remmask = rempts_in.copy()
    remmask[~np.isnan(remmask)] = np.nan # Convert all removed points to NaN
    remmask[np.isnan(rempts_in)] = 1
    clusterout,knan,okpts = proc.find_nan((clusterin*remmask).flatten(),0)
        
    # Ugly Fix, but not sure why sometimes s_in size doesnt match clusterin (non-nan)
    if len(clusterout) != len(s_in):
        print("Mismatch between clusterout (%i) and s_in (%i) for interval %i" % (len(clusterout),
                                                                                  len(s_in),idx))
        clusterout,knan,okpts = proc.find_nan((clusterin).flatten(),0)
    
    # Make Silhouette Map
    silmap = np.zeros(nlat5*nlon5)*np.nan
    silmap[okpts] = s_in
    silmap = silmap.reshape(nlat5,nlon5)
    silmap_all[idx,:,:] = silmap

    # Reassign clusters
Example #3
0
def cluster_ssh(sla, lat, lon, nclusters, distthres=3000, returnall=False):

    # Remove All NaN Points
    ntime, nlat, nlon = sla.shape
    slars = sla.reshape(ntime, nlat * nlon)
    okdata, knan, okpts = proc.find_nan(slars, 0)
    npts = okdata.shape[1]

    # ---------------------------------------------
    # Calculate Correlation and Covariance Matrices
    # ---------------------------------------------
    srho = np.corrcoef(okdata.T, okdata.T)
    scov = np.cov(okdata.T, okdata.T)
    srho = srho[:npts, :npts]
    scov = scov[:npts, :npts]

    # --------------------------
    # Calculate Distance Matrix
    # --------------------------
    lonmesh, latmesh = np.meshgrid(lon, lat)
    coords = np.vstack([lonmesh.flatten(), latmesh.flatten()]).T
    coords = coords[okpts, :]
    coords1 = coords.copy()
    coords2 = np.zeros(coords1.shape)
    coords2[:, 0] = np.radians(coords1[:, 1])  # First point is latitude
    coords2[:, 1] = np.radians(coords1[:, 0])  # Second Point is Longitude
    sdist = haversine_distances(coords2, coords2) * 6371

    # --------------------------
    # Combine the Matrices
    # --------------------------
    a_fac = np.sqrt(
        -distthres /
        (2 * np.log(0.5)))  # Calcuate so exp=0.5 when distance is 3000km
    expterm = np.exp(-sdist / (2 * a_fac**2))
    distance_matrix = 1 - expterm * srho

    # --------------------------
    # Do Clustering (scipy)
    # --------------------------
    cdist = squareform(distance_matrix, checks=False)
    linked = linkage(cdist, 'weighted')
    clusterout = fcluster(linked, nclusters, criterion='maxclust')

    # -------------------------
    # Calculate the uncertainty
    # -------------------------
    uncertout = np.zeros(clusterout.shape)
    uncertsig = np.zeros(clusterout.shape)
    for i in range(len(clusterout)):
        covpt = scov[i, :]  #
        cid = clusterout[i]  #
        covin = covpt[np.where(clusterout == cid)]
        covout = covpt[np.where(clusterout != cid)]
        uncertpt = np.mean(covin) / np.mean(covout)
        uncertout[i] = uncertpt

        # --------------------------------------------
        # Monte-Carlo Analysis to compute significance
        # --------------------------------------------
        sigpt = monte_carlo_cluster(uncertpt,
                                    covpt,
                                    len(covin),
                                    mciter=1000,
                                    p=0.05,
                                    tails=2)
        uncertsig[i] = sigpt

    # Apply rules from Thompson and Merrifield (Do this later)
    # if uncert > 2, set to 2
    # if uncert <0.5, set to 0
    #uncertout[uncertout>2]   = 2
    #uncertout[uncertout<0.5] = 0

    # -----------------------
    # Replace into full array
    # -----------------------
    clustered = np.zeros(nlat * nlon) * np.nan
    clustered[okpts] = clusterout
    clustered = clustered.reshape(nlat, nlon)
    cluster_count = []
    for i in range(nclusters):
        cid = i + 1
        cnt = (clustered == cid).sum()
        cluster_count.append(cnt)
        print("Found %i points in cluster %i" % (cnt, cid))
    uncert = np.zeros(nlat * nlon) * np.nan
    uncert[okpts] = uncertout
    uncert = uncert.reshape(nlat, nlon)

    if returnall:
        return clustered, uncert, uncertsig, cluster_count, srho, scov, sdist, distance_matrix
    return clustered, uncert, uncertsig, cluster_count
Example #4
0
fig, ax = plt.subplots(
    1, 1, subplot_kw={'projection': ccrs.PlateCarree(central_longitude=0)})
ax = slutil.add_coast_grid(ax)
pcm = ax.pcolormesh(lon5,
                    lat5,
                    ptmap,
                    cmap='bone',
                    transform=ccrs.PlateCarree(),
                    alpha=0.88)
fig.colorbar(pcm, ax=ax)
ax.set_title("Removed Zero Points")

# ---
# Visualize Filter Transfer Function
# ---
okdata, knan, okpts = proc.find_nan(slars, 0)
npts5 = okdata.shape[1]
lpdata = okdata.copy()
rawdata = ssha.reshape(ntimer, nlat5 * nlon5)[:, okpts]
lpspec, rawspec, p24, filtxfer, fig, ax = slutil.check_lpfilter(rawdata,
                                                                lpdata,
                                                                xtk[1],
                                                                M,
                                                                tw,
                                                                dt=24 * 3600 *
                                                                30)
plt.savefig("%sFilter_Transfer_%imonLP_%ibandavg_%s.png" %
            (expdir, tw, M, expname),
            dpi=200)

# ---
Example #5
0
fig, ax = plt.subplots(
    1, 1, subplot_kw={'projection': ccrs.PlateCarree(central_longitude=0)})
ax = slutil.add_coast_grid(ax)
pcm = ax.pcolormesh(lon5,
                    lat5,
                    ptmap,
                    cmap='bone',
                    transform=ccrs.PlateCarree(),
                    alpha=0.88)
fig.colorbar(pcm, ax=ax)
ax.set_title("Removed Zero Points")

# ---
# Visualize Filter Transfer Function
# ---
okdata, knan, okpts = proc.find_nan(slars, 0)
npts5 = okdata.shape[1]
lpdata = okdata.copy()
rawdata = ssha.reshape(ntimer, nlat5 * nlon5)[:, okpts]
lpspec, rawspec, p24, filtxfer, fig, ax = slutil.check_lpfilter(rawdata,
                                                                lpdata,
                                                                xtk[1],
                                                                M,
                                                                tw,
                                                                dt=24 * 3600 *
                                                                30)
plt.savefig("%sFilter_Transfer_%imonLP_%ibandavg_%s.png" %
            (expdir, tw, M, expname),
            dpi=200)

# ---
Example #6
0
    manom, invar = proc.calc_clim(
        invar, 0, returnts=1)  # Calculate clim with time in axis 0
    vanom = invar - manom[None, :, :, :]
    vanom = vanom.reshape(nmon, nlat,
                          nlon)  # Reshape back to [time x lat x lon]

    # Flip latitude
    if lat[0] > lat[-1]:  # If latitude is decreasing...
        lat = np.flip(lat)
        vanom = np.flip(vanom, axis=1)

    # Detrend the variable (taken from calc_amv_hadisst.py)
    # ----------------------------------------------------
    start = time.time()
    indata = vanom.reshape(nmon, nlat * nlon).T  # Transpose to [Space x Time]
    okdata, knan, okpts = proc.find_nan(indata, 1)
    x = np.arange(0, nmon, 1)
    if detrend == 0:
        # Compute global weighted average
        glomean = proc.area_avg(vanom.transpose(2, 1, 0), [0, 360, -90, 90],
                                lon, lat, 1)

        # Regress back to the original data to get the global component
        beta, b = proc.regress_2d(glomean, okdata)

        # Subtract this from the original data
        okdt = okdata - beta[:, None]
    else:
        # Polynomial Detrend
        okdt, model = proc.detrend_poly(x, okdata, detrend)
        if debug:
Example #7
0
 sstnp = sstnp.transpose(0,3,1,2) #[model x time x lon x lat]
 sstrs = sstnp.reshape(4,nmon,pointsize)
 
 
 # Preallocate 
 autocorr_all = np.ones((4,12,len(lags),nlonr,nlatr)) * np.nan
 for e in range(4):
 
     enstime = time.time()
 
 
     # Get ensemble [time x space]
     sstens = sstrs[e,:,:]
 
     # Isolate non-nan points, summing along dimension zero
     oksst,knan,okpts = proc.find_nan(sstens,0)
 
     # Get dimensions and reshape the time to [month x yr x space]
     timedim,spacedim = oksst.shape 
     oksst = np.reshape(oksst,(int(timedim/12),12,spacedim))
     oksst = np.transpose(oksst,(1,0,2))
     
     # Preallocate and loop for each month...
     autocorrm = np.ones((12,len(lags),spacedim)) * np.nan
 
     # Loop for the months
     for m in range(12):
 
         # Calculate autocorrelation for that month
         autocorrm[m,:,:] = proc.calc_lagcovar_nd(oksst,oksst,lags,m+1,0)
 
Example #8
0
        color='k')
ax.plot(slapt_ss, label="Estimated Cycle", color='red')
ax.plot(slaptrm.squeeze(), label="Deseasonalized Data", color='b')

#%% Check to see if the dataset has strong seasonal cycle

# fig,ax = plt.subplots(1,1)
# pcm = ax.pcolormesh(lon5,lat5,sla_5deg[0,:,:])
# ax.scatter([])

#%% 4.5) Remove NaN points and Examine Low pass filter

slars = sla_lp.reshape(ntime, nlat5 * nlon5)

# Locate only non-Nan points
okdata, knan, okpts = proc.find_nan(slars, 0)
npts = okdata.shape[1]

# Quick check low pass filter transfer function
lpdata = okdata.copy()
rawdata = sla_5deg.reshape(ntime, nlat5 * nlon5)[:, okpts]
lpspec = []
rawspec = []
npts5 = okdata.shape[1]
for i in tqdm(range(npts5)):
    X_spec, freq, _ = tbx.bandavg_autospec(rawdata[:, i], dt, M, .05)
    X_lpspec, _, _ = tbx.bandavg_autospec(lpdata[:, i], dt, M, .05)
    lpspec.append(X_lpspec)
    rawspec.append(X_spec)
lpspec = np.array(lpspec)
rawspec = np.array(rawspec)
Example #9
0
bbox_NA = [-80, 0, 0, 65]

regions = ("SPG", "STG", "TRO", "NAT")
bboxes = (bbox_SP, bbox_ST, bbox_TR, bbox_NA)

stdboxes = []
stdval = []
for r in range(4):

    # Select data from region
    bbox = bboxes[r]
    datr, _, _ = proc.sel_region(fstd, clon1, clat, bbox)

    # Make Data 1D and remove NaN points
    datr = datr.flatten()
    datr, _, _ = proc.find_nan(datr, 0)

    # Append data
    stdboxes.append(datr)
    stdval.append(np.mean(datr))

# Create Plot
fig, ax = plt.subplots(1, 1, figsize=(6, 4))
plt.style.use("seaborn")
bp = ax.boxplot(stdboxes, 0, '',
                labels=regions)  # Note Outlier POints are not shown
ax.set_xlabel("Region")
ax.set_ylabel("Standard Deviation")
ax.set_title("$\sigma_{Forcing}$ CESM1LE 42-member Average")
ax.set_ylim([0, 0.8])
plt.savefig("%sForcing_Stdev_Regional.png" % (outpathfig), dpi=200)
Example #10
0
print("Data loaded in %.2fs" % (time.time() - st))

#%% Calculate ENSO

# Apply Area Weight
_, Y = np.meshgrid(lon, lat)
wgt = np.sqrt(np.cos(np.radians(Y)))  # [lat x lon]
ts = ts * wgt[None, :, :]

# Reshape for ENSO calculations
ntime, nlat, nlon = ts.shape
ts = ts.reshape(ntime, nlat * nlon)  # [time x space]
ts = ts.T  #[space x time]

# Remove NaN points
okdata, knan, okpts = proc.find_nan(ts, 1)  # Find Non-Nan Points
oksize = okdata.shape[0]

# Calcuate monthly anomalies
okdata = okdata.reshape(oksize, int(ntime / 12), 12)  # [space x yr x mon]
manom = okdata.mean(1)
tsanom = okdata - manom[:, None, :]
#tsanom = tsanom.reshape(nlat*nlon,ntime)
nyr = tsanom.shape[1]

eofall = np.zeros((nlat * nlon, 12, pcrem)) * np.nan  # [space x month x pc]
pcall = np.zeros((nyr, 12, pcrem)) * np.nan  # [year x month x pc]
varexpall = np.zeros((12, pcrem)) * np.nan  #[month x pc]

# Compute EOF!!
for m in range(12):
Example #11
0
        np.save(datpath + "AVISO_GMSL_%s_%s.npy" % (start, end), gmslrem)
else:
    print("GMSL Not Removed")

# ---------------------
#%% Remove Seasonal Cycle
# ---------------------
if rem_seas:
    print("Removing Seasonal Cycle!")
    # Copy and reshape data
    sshc = ssha.copy()
    ntime = sshc.shape[0]
    sshc = sshc.reshape(ntime, nlat5 * nlon5)

    # Get non nan points
    okdata, knan, okpts = proc.find_nan(sshc, 0)

    # Remove seasonal cycle
    x, E = proc.remove_ss_sinusoid(okdata, semiannual=True)
    ssh_ss = E @ x
    okdata_ds = okdata - ssh_ss

    # Replace into data
    sshnew = np.zeros(sshc.shape) * np.nan
    sshnew[:, okpts] = okdata_ds
    sshnew = sshnew.reshape(ntime, nlat5, nlon5)

    sshss = np.zeros(sshc.shape) * np.nan
    sshss[:, okpts] = ssh_ss
    sshss = sshss.reshape(ntime, nlat5, nlon5)
for idx in tqdm(range(nint)):  # Portions copied from script below

    # Read out the values
    clusterin = clusts[idx]
    uncertin = uncert[idx]
    s_in = s_all[idx]
    s_byclust_in = s_by_clust[idx]
    countin = count[idx]
    rngin = rngs[idx]
    rempts_in = rempts[idx]

    # Recover clusterout for silhouette plotting
    remmask = rempts_in.copy()
    remmask[~np.isnan(remmask)] = np.nan  # Convert all removed points to NaN
    remmask[np.isnan(rempts_in)] = 1
    clusterout, knan, okpts = proc.find_nan((clusterin * remmask).flatten(), 0)

    # Ugly Fix, but not sure why sometimes s_in size doesnt match clusterin (non-nan)
    if len(clusterout) != len(s_in):
        print(
            "Mismatch between clusterout (%i) and s_in (%i) for interval %i" %
            (len(clusterout), len(s_in), idx))
        clusterout, knan, okpts = proc.find_nan((clusterin).flatten(), 0)

    # Make Silhouette Map
    silmap = np.zeros(nlat5 * nlon5) * np.nan
    silmap[okpts] = s_in
    silmap = silmap.reshape(nlat5, nlon5)
    silmap_all[idx, :, :] = silmap

    # Reassign clusters
Example #13
0
#%% Perform Linear Detrend on SST and annually averaged sst (Detrend First)

usedtfunction = 1  # Set to 1 to use the new detrending function

if usedtfunction == 1:

    start = time.time()
    dt_hsst, ymodall, _, _ = proc.detrend_dim(hsstnew, 2)
    print("Detrended in %.2fs" % (time.time() - start))
else:

    # Reshape to [Time x Space] and remove NaN Points
    start = time.time()
    hsstnew = np.reshape(hsstnew, (360 * 180, 1176)).T
    hsstok, knan, okpts = proc.find_nan(hsstnew, 0)

    tper = np.arange(0, hsstok.shape[0])
    beta, b = proc.regress_2d(tper, hsstok)  # Perform regression

    # Detrend
    dt_hsst = hsstnew[:, okpts] - (beta[:, None] * tper + b[:, None]).T

    # Replace NaN vaues back into the system
    hsstall = np.zeros(hsstnew.shape) * np.nan
    hsstall[:, okpts] = dt_hsst

    # Also save the linear model
    ymodall = np.zeros(hsstnew.shape) * np.nan
    ymodall[:, okpts] = (beta[:, None] * tper + b[:, None]).T
Example #14
0
dsfirst = dsfirst - np.mean(dsfirst,axis=2)[:,:,None,:]
dsfirst = np.reshape(dsfirst,(360,180,hsstnew.shape[2]))

# Detrend
# start= time.time()
# dtdsfirst,dsymodall,_,_ = proc.detrend_dim(dsfirst,2)
# print("Detrended in %.2fs" % (time.time()-start))


# Detrend
nlon = 360
nlat = 180
nmon = nyrs*12
start= time.time()
indata = dsfirst.reshape(nlon*nlat,nmon)
okdata,knan,okpts = proc.find_nan(indata,1)
x = np.arange(0,nmon,1)

if method == 0:
    # Calculate global mean SST
    glomean = okdata.mean(0)
    # Regress back to the original data to get the global component
    beta,b=proc.regress_2d(glomean,okdata)
    # Subtract this from the original data
    okdt = okdata - beta[:,None]

    # Calculate quadratic trend
else: 
    
    okdt,model = proc.detrend_poly(x,okdata,method)
    
Example #15
0
def return_ar1_model(invar, simlen):
    """
    Creates AR1 model for input timeseries [invar] thru the following steps:
    
        1. Calculate Lag 1 Correlation Coefficient (R) and Effective DOF
        2. Calculates variance of noise sigma = sqrt[(1-R^2)*var(invar)]
        3. Integrate y(t) = R*y(t-1) + N(0,sigma) for [simlen] steps
    
    
    Inputs
    ------
    1) invar [time x lat x lon] - input variable
    2) simlen [int] - simulation length
    
    Outputs
    -------
    1) rednoisemodel [simlen x lat x lon]
    2) ar1_map [lat x lon]
    3) neff_map [lat x lon]
    
    """

    # --------------------------------
    # Part 1: Calculate AR1 and N_eff
    # --------------------------------
    # Remove NaNs
    ntime, nlat5, nlon5 = invar.shape
    invar = invar.reshape(ntime, nlat5 * nlon5)
    okdata, knan, okpts = proc.find_nan(invar, dim=0)
    npts = invar.shape[1]
    nok = okdata.shape[1]
    # Compute Lag 1 AR for each and effective DOF
    ar1 = np.zeros(nok)
    neff = np.zeros(nok)
    for i in range(nok):

        ts = okdata[:, i]
        r = np.corrcoef(ts[1:], ts[:-1])[0, 1]
        ar1[i] = r
        neff[i] = ntime * (1 - r) / (1 + r)

    # Replace into domain
    ar1_map = np.zeros(npts) * np.nan
    neff_map = np.zeros(npts) * np.nan
    ar1_map[okpts] = ar1
    neff_map[okpts] = neff
    ar1_map = ar1_map.reshape(nlat5, nlon5)
    neff_map = neff_map.reshape(nlat5, nlon5)

    # ---------------------------------------
    # Part 2: Get variance and make AR1 model
    # ---------------------------------------

    # Calulate variance of noise
    invar = invar.reshape(ntime, nlat5, nlon5)
    n_sigma = np.sqrt((1 - ar1_map**2) * np.var(invar, 0))

    # Create model
    rednoisemodel = np.zeros((simlen, nlat5, nlon5))
    noisets = np.random.normal(0, 1, rednoisemodel.shape)
    noisets *= n_sigma[None, :, :]
    for i in range(1, simlen):
        rednoisemodel[
            i, :, :] = ar1_map * rednoisemodel[i - 1, :, :] + noisets[i, :, :]

    # ---------------------------
    # Apply landice mask to model
    # ---------------------------
    msk = invar.copy()
    msk = msk.sum(0)
    msk[~np.isnan(msk)] = 1
    rednoisemodel *= msk[None, :, :]

    vardiff = (np.var(invar, 0)) - np.var(rednoisemodel, 0)
    #print("maximum difference in variance is %f"% np.nanmax(np.abs(vardiff)))
    return rednoisemodel, ar1_map, neff_map
Example #16
0
    pslglo = pslglo - pslglo.mean(0)[None, :, :]

    # Preallocate
    pcall = np.zeros((nens, nyr, N_mode))  # [ens x year x pc]
    varexpall = np.zeros((nens, N_mode))  # [ens x pc]
    eofall = np.zeros((nens, nlat, nlon, N_mode))  # [ens x lat x lon x pc]

    for e in range(nens):
        startloop = time.time()

        # Select ensemble [Space x Time]
        varens = pslnao[e, :, :]
        varglo = pslglo[e, :, :]

        # Get rid of NaN points
        okdata, knan, okpts = proc.find_nan(varens, 1)
        okdatap, knanp, okptsp = proc.find_nan(varglo, 1)

        #% Perform EOF -------------
        _, pcs, varexpall[e, :] = proc.eof_simple(okdata, N_mode, 1)

        # Standardize pc before regression along the time dimension
        pcstd = np.squeeze(pcs / np.std(pcs, 0))

        #Loop for each mode... (NOte, can vectorize this to speed it up,.)
        psleofs = np.ones((192, 288, 3))

        for pcn in range(N_mode):

            # Regress back to SLP
            eofpatokp, _ = proc.regress_2d(pcstd[:, pcn], okdatap, nanwarn=0)