def reconstruct_data(arr, neofs=16): if type(neofs) == int: neofs = [neofs] solver = Eof(arr, center=False) for n in neofs: reconstructed = solver.reconstructedField(neofs=n) pcs = solver.pcs(npcs=n) eofs = solver.eofs(neofs=n) yield reconstructed, pcs, eofs
### is to evaluate GEFS v12 reforecasts where the reforecast length is usually 16 days which made it impossible ### to apply n-day moving average or other time filtering methods. 4 EOFs may already filter some noisy signals ### To be consistent with other reseachers, one can add n-day moving code above. # EOF analysis solver = Eof(atemp_era5_pre, center=True) pcs = solver.pcs() mid_eig = solver.eigenvalues() mid_eofs = solver.eofs() eofs = solver.eofs() ### Print explained variance when using 4 EOFs #var_explained_era5= np.cumsum(mid_eig)/np.sum(np.sum(mid_eig)) #print(var_explained_era5[3]) #0.5316330300316366 reconstruction_era5 = solver.reconstructedField( noef) #Using 4 leading EOFs to reconstruct hgt500 field ### The Kmeans method needs a 2-D data format: number of days x horizontal fields atemp_era5_post = np.zeros((ny * nt, nlat * nlon)) for i in np.arange(ny * nt): atemp_era5_post[i] = (reconstruction_era5[i]).flatten() ### For other regimes, one probably should include red-noise test (e.g., Vigaud et al. 2018) ### Define 4 clusters (consistent with previous studies). Different random state won't change the results very much km_standard = KMeans(n_clusters=ncluster, random_state=0) km_standard = km_standard.fit(atemp_era5_post) label_era5 = km_standard.labels_ # (total number of days: ny x nday = 37 x 121 = 4477 for sample data) mid_label_yearday0 = np.reshape( label_era5, [ny, nt]) # Reshape label into 2-D array ny x nt #mid_finalcluster = km_standard.cluster_centers_ # cluster centers