Beispiel #1
0
def cluster_ssh(sla,
                lat,
                lon,
                nclusters,
                distthres=3000,
                returnall=False,
                absmode=0,
                distmode=0,
                uncertmode=0,
                printmsg=True,
                calcsil=False):

    # ---------------------------------------------
    # Calculate Correlation, Covariance, and Distance Matrices
    # ---------------------------------------------
    ntime, nlat, nlon = sla.shape
    srho, scov, sdist, okdata, okpts, coords2 = slutil.calc_matrices(
        sla, lon5, lat5, return_all=True)
    #npts = okdata.shape[1]

    # -------------------------------
    # Apply corrections based on mode
    # -------------------------------
    if absmode == 1:  # Take Absolute Value of Correlation/Covariance
        scov = np.abs(scov)
        srho = np.abs(srho)
    elif absmode == 2:  # Use Anticorrelation, etc
        scov *= -1
        srho *= -1

    # --------------------------
    # Combine the Matrices
    # --------------------------
    a_fac = np.sqrt(
        -distthres /
        (2 * np.log(0.5)))  # Calcuate so exp=0.5 when distance is 3000km
    expterm = np.exp(-sdist / (2 * a_fac**2))

    if distmode == 0:  # Include distance and correlation
        distance_matrix = 1 - expterm * srho
    elif distmode == 1:  # Just Include distance
        distance_matrix = 1 - expterm
    elif distmode == 2:  # Just Include correlation
        distance_matrix = 1 - srho

    # --------------------------
    # Do Clustering (scipy)
    # --------------------------
    cdist = squareform(distance_matrix, checks=False)
    linked = linkage(cdist, 'weighted')
    clusterout = fcluster(linked, nclusters, criterion='maxclust')

    # --------------------
    # Calculate Silhouette
    # --------------------
    if calcsil:
        s_score, s, s_bycluster = slutil.calc_silhouette(
            distance_matrix, clusterout, nclusters)
    # fig,ax = plt.subplots(1,1)
    # ax = slutil.plot_silhouette(clusterout,nclusters,s,ax1=ax)

    # -------------------------
    # Calculate the uncertainty
    # -------------------------
    uncertout = np.zeros(clusterout.shape)
    for i in range(len(clusterout)):
        covpt = scov[i, :]  #
        cid = clusterout[i]  #
        covin = covpt[np.where(clusterout == cid)]
        covout = covpt[np.where(clusterout != cid)]

        if uncertmode == 0:
            uncertout[i] = np.mean(covin) / np.mean(covout)
        elif uncertmode == 1:
            uncertout[i] = np.median(covin) / np.median(covout)

    # Apply rules from Thompson and Merrifield (Do this later)
    # if uncert > 2, set to 2
    # if uncert <0.5, set to 0
    #uncertout[uncertout>2]   = 2
    #uncertout[uncertout<0.5] = 0

    # ------------------------------
    # Calculate Wk for gap statistic
    # ------------------------------
    Wk = np.zeros(nclusters)
    for i in range(nclusters):

        cid = i + 1
        ids = np.where(clusterout == cid)[0]
        dist_in = distance_matrix[ids[:, None], ids[
            None, :]]  # Get Pairwise Distances within cluster
        dist_in = dist_in.sum(
        ) / 2  # Sum and divide by 2 (since pairs are replicated)

        Wk[i] = dist_in

    # -----------------------
    # Replace into full array
    # -----------------------
    clustered = np.zeros(nlat * nlon) * np.nan
    clustered[okpts] = clusterout
    clustered = clustered.reshape(nlat, nlon)
    cluster_count = []
    for i in range(nclusters):
        cid = i + 1
        cnt = (clustered == cid).sum()
        cluster_count.append(cnt)
        if printmsg:
            print("Found %i points in cluster %i" % (cnt, cid))
    uncert = np.zeros(nlat * nlon) * np.nan
    uncert[okpts] = uncertout
    uncert = uncert.reshape(nlat, nlon)

    if calcsil:  # Return silhouette values
        return clustered, uncert, cluster_count, Wk, s, s_bycluster
    if returnall:
        return clustered, uncert, cluster_count, Wk, srho, scov, sdist, distance_matrix
    return clustered, uncert, cluster_count, Wk
#%% Calculate covariance matrix

sla = sla_lp
lon = lon5
lat = lat5

absmode = 0
distthres = 3000
distmode = 0
uncertmode = 0
# --------------------------------------------------------
# Calculate Correlation, Covariance, and Distance Matrices
# --------------------------------------------------------
ntime, nlat, nlon = sla.shape
srho, scov, sdist, okdata, okpts, coords2 = slutil.calc_matrices(
    sla, lon, lat, return_all=True)

# -------------------------------
# Apply corrections based on mode
# -------------------------------
if absmode == 1:  # Take Absolute Value of Correlation/Covariance
    scov = np.abs(scov)
    srho = np.abs(srho)
elif absmode == 2:  # Use Anticorrelation, etc
    scov *= -1
    srho *= -1

# --------------------------
# Combine the Matrices
# --------------------------
a_fac = np.sqrt(
Beispiel #3
0
#snamelong  = "Red Noise (Filtered)"
snamelong = "Distance Only"
#snamelong  = "Distance Only"
#snamelong  = "Characteristic Distance = %i km "% chardist
expname = "AVISO_DistanceOnly_nclusters%i_Filtered_" % (nclusters)
#expname    = "AVISO_WhiteNoise_nclusters%i_Filtered_" % (nclusters)
#expname = "AVISO_WhiteNoise_chardist%i_nclusters%i_Filtered_" % (chardist,nclusters)
distmode = 0
absmode = 0
uncertmode = 0

# ------------------
# Calculate Matrices
# ------------------
ntime, nlat, nlon = varin.shape
srho, scov, sdist, okdata, okpts, coords2 = slutil.calc_matrices(
    varin, lon5, lat5, return_all=True)
if absmode == 1:
    scov = np.abs(scov)
    srho = np.abs(srho)
elif absmode == 2:
    scov *= -1
    srho *= -1

# --------------------------
# Combine the Matrices
# --------------------------
distthres = 3000

# Default Distance Calculation
a_fac = np.sqrt(-distthres / (2 * np.log(0.5)))
expterm = np.exp(-sdist / (2 * a_fac**2))