def reconstruct_data(arr, neofs=16):
    if type(neofs) == int:
        neofs = [neofs]

    solver = Eof(arr, center=False)
    for n in neofs: 
        reconstructed = solver.reconstructedField(neofs=n)
        pcs = solver.pcs(npcs=n)
        eofs = solver.eofs(neofs=n)
        yield reconstructed, pcs, eofs
Ejemplo n.º 2
0
###   is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible
###   to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals
### To be consistent with other reseachers, one can add n-day moving code above.
# EOF analysis
solver = Eof(atemp_era5_pre, center=True)
pcs = solver.pcs()
mid_eig = solver.eigenvalues()
mid_eofs = solver.eofs()
eofs = solver.eofs()

### Print explained variance when using 4 EOFs
#var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig))
#print(var_explained_era5[3])
#0.5316330300316366

reconstruction_era5 = solver.reconstructedField(
    noef)  #Using 4 leading EOFs to reconstruct hgt500 field

### The Kmeans method needs a 2-D data format: number of days x horizontal fields
atemp_era5_post = np.zeros((ny * nt, nlat * nlon))
for i in np.arange(ny * nt):
    atemp_era5_post[i] = (reconstruction_era5[i]).flatten()

### For other regimes, one probably should include red-noise test (e.g., Vigaud et al. 2018)
### Define 4 clusters (consistent with previous studies). Different random state won't change the results very much
km_standard = KMeans(n_clusters=ncluster, random_state=0)
km_standard = km_standard.fit(atemp_era5_post)
label_era5 = km_standard.labels_  # (total number of days: ny x nday = 37 x 121 = 4477 for sample data)

mid_label_yearday0 = np.reshape(
    label_era5, [ny, nt])  # Reshape label into 2-D array ny x nt
#mid_finalcluster = km_standard.cluster_centers_    # cluster centers