def test_project_onto_eof_values(shape, weight, wrap): """Test projection onto fields used to calculate eofs""" data = example_da(shape, wrap=wrap) field = example_da(shape, wrap=wrap) lat_dim = f"dim_{len(shape)-1}" xeof.core.LAT_NAME = lat_dim if weight == "none": weights = None elif weight == "sqrt_cos_lat": weights = np.cos(data[lat_dim] * np.pi / 180)**0.5 sensor_dims = [f"dim_{i}" for i in range(1, len(shape))] eofs = eof( data, sensor_dims=sensor_dims, sample_dim="time", weight=weight, n_modes=np.inf, norm_PCs=False, ) pc_res = project_onto_eof(field, eofs["eof"], sensor_dims=sensor_dims, weight=weight) Eof_solver = Eof(data, weights=weights, center=False) pc_ver = Eof_solver.projectField(field) # Our mode start at 1: pc_ver = pc_ver.assign_coords({"mode": pc_ver["mode"] + 1}) npt.assert_allclose(pc_res - pc_ver, 0.0, atol=1e-10)
def compute_npgo_index(): """Computes the North Pacific Gyre Oscillation (NPGO) index from SST output. References: * Di Lorenzo, E. and N. Mantua, 2016: Multi-year persistence of the 2014/15 North Pacific marine heatwave. Nature Climate Change, 6(11) 1042-+, doi:10.1038/nclimate3082. """ sst = load_single_variable("SST") area = xr.open_dataarray("data/area.nc") # Extract North Pacific region for Principal Component Analysis (PCA). sst = extract_region(sst, sst.TLONG, sst.TLAT, [180, 250, 25, 62]) area = extract_region(area, area.TLONG, area.TLAT, [180, 250, 25, 62]) sst_anom = calculate_anomaly(sst) month = sst_anom["time.month"] # Calculate PCA over the winter (JFM) annual means. JFM = month <= 3 # EOF package requires `time` to be the leading dimension. sst_anom_winter = ( sst_anom.where(JFM).resample(time="A").mean("time").transpose("time", ...) ) # Compute PCA (EOF) over the annual winter averages, weighted by the grid cell area. solver = Eof(sst_anom_winter, weights=area, center=False) # Reconstruct the monthly index of SSTa by projecting the SSTa values onto the # annual principal component time series. pseudo_pc = solver.projectField( sst_anom.transpose("time", ...), neofs=2, eofscaling=1 ) # Mode 0 is the Pacific Decadal Oscillation. We're interested in its orthogonal # mode, which is the NPGO. The NPGO still explains ~20% of the variance of the # North Pacific. return pseudo_pc.isel(mode=1)
def compute_relative_entropy( initialized, control, anomaly_data=False, neofs=None, curv=True, nlead=None, nmember_control=10, ): """ Compute relative entropy. Calculates EOFs from anomalies. Projects fields on EOFs to receive pseudo-Principle Components per init and lead year. Calculate relative entropy based on _relative_entropy_formula. Args: initialized (xr.Dataset): anomaly ensemble data with dimensions lead, member, time and spatial [lon (x), lat(y)]. DPLE or PM_ds control (xr.Dataset): anomaly control distribution with non-spatial dimensions: spatial [lon (x), lat(y)]. - LENS: member, time - PM_control: time anomaly_data (bool): Input data is anomaly alread. Default: False. neofs (int): number of EOFs to use. Default: initialized.member.size. curv (bool): if curvilinear grids disables EOF weights. nlead (int): number of timesteps calculated. nmember_control (int): number of members created from bootstrapping from control Returns: rel_ent (xr.Dataset): relative entropy """ # Defaults if neofs is None: neofs = initialized.member.size if nlead is None: nlead = initialized.lead.size # case if you submit control with dim time and member, LENS case if "member" in control.dims: control_uninitialized = _bootstrap_dim( control, initialized.lead.size, dim="init", dim_label=list(initialized.init.values), ) # case if you only submit control with dim time, PM case else: control_uninitialized = xr.concat( [ _bootstrap_dim( control, initialized.lead.size, dim="member", dim_label=np.arange(nmember_control), ) for _ in range(initialized.init.size) ], dim="init", ) control_uninitialized["init"] = initialized.init.values # initialized and control_uninitialized are allowed to have different # dims as I need more members to sample my control distr. properly if set(initialized.dims) != set(control_uninitialized.dims): warnings.warn( "Warning: initialized and control_uninitialized have different coords." ) # print(initialized, control_uninitialized) # convert to xr.Data.Array if isinstance(control_uninitialized, xr.Dataset): control_uninitialized = control_uninitialized.to_array().squeeze() if isinstance(initialized, xr.Dataset): initialized = initialized.to_array().squeeze() # detrend non_spatial_dims = set(control_uninitialized.dims).intersection( ["init", "member"]) non_spatial_dims = list(non_spatial_dims) if not anomaly_data: # if ds, control are raw values anom_x = initialized - control_uninitialized.mean(non_spatial_dims) anom_b = control_uninitialized - control_uninitialized.mean( non_spatial_dims) else: # leave as is when already anomalies anom_x = initialized anom_b = control_uninitialized # prepare for EOF if curv: # if curvilinear lon(x,y), lat(x,y) data inputs wgts = None else: # assumes there is 'lat' in coords coslat = np.cos(np.deg2rad(anom_x.coords["lat"].values)) wgts = np.sqrt(coslat)[..., np.newaxis] # EOF requires xr.dataArray if isinstance(control, xr.Dataset): control = control.to_array().squeeze() if "member" in control.dims: # LENS # stack member and init into time dim, make time first non_spatial_control_dims = list( set(control.dims).intersection(["time", "member"])) transpose_dims = list(control.dims) transpose_dims.remove("member") transpose_dims.remove("time") dims = tuple(["time"] + transpose_dims) base_to_calc_eofs = (control.stack( new=tuple(non_spatial_control_dims)).rename({ "new": "time" }).set_index({ "time": "time" }).transpose(*dims)) else: # PM_control base_to_calc_eofs = control solver = Eof(base_to_calc_eofs, weights=wgts) re_leadtime_list = [] leads = initialized.lead.values[:nlead] inits = initialized.init.values # DoTo: parallelize this double loop for init in inits: # loop over inits rl, sl, dl = ([] for _ in range(3)) # lists to store results in for lead in leads: # loop over lead time # P_b base distribution # eofs require time pc_b = solver.projectField( anom_b.sel(init=init, lead=lead).drop("lead").rename({"member": "time"}), neofs=neofs, eofscaling=0, weighted=False, ).rename({"time": "lead"}) mu_b = pc_b.mean("lead") sigma_b = xr.DataArray(np.cov(pc_b.T)) # P_x init distribution pc_x = solver.projectField( anom_x.sel(init=init, lead=lead).drop("lead").rename({"member": "time"}), neofs=neofs, eofscaling=0, weighted=False, ).rename({"time": "lead"}) mu_x = pc_x.mean("lead") sigma_x = xr.DataArray(np.cov(pc_x.T)) r, d, s = _relative_entropy_formula(sigma_b, sigma_x, mu_x, mu_b, neofs) rl.append(r) sl.append(s) dl.append(d) re_leadtime_list.append( xr.Dataset({ "R": ("lead", rl), "S": ("lead", sl), "D": ("lead", dl) })) re = xr.concat(re_leadtime_list, dim="init").assign(init=inits, lead=leads) return re
def main(): ens = sys.argv[1] sYear = sys.argv[2] eYear = sys.argv[3] if int(sYear) < 1920: raise ValueError("Starting year must be 1920 or later.") if int(eYear) > 2100: raise ValueError("End year must be 2100 or earlier.") print("Computing NPGO for ensemble number " + ens + "...") filepath = ('/glade/scratch/rbrady/EBUS_BGC_Variability/' + 'global_residuals/SST/remapped/remapped.SST.' + ens + '.192001-210012.nc') print("Global residuals loaded...") ds = xr.open_dataset(filepath) ds = ds['SST'].squeeze() # Make time dimension readable through xarray. ds['time'] = pd.date_range('1920-01', '2101-01', freq='M') # Reduce to time period of interest. ds = ds.sel(time=slice(sYear + '-01', eYear + '-12')) # Slice down to Northeast Pacific domain. ds = ds.sel(lat=slice(25, 62), lon=slice(180, 250)) # Take annual JFM means. month = ds['time.month'] JFM = (month <= 3) ds_winter = ds.where(JFM).resample('A', 'time') # Compute EOF coslat = np.cos(np.deg2rad(ds_winter.lat.values)) wgts = np.sqrt(coslat)[..., np.newaxis] solver = Eof(ds_winter, weights=wgts, center=False) print("NPGO computed.") eof = solver.eofsAsCorrelation(neofs=2) variance = solver.varianceFraction(neigs=2) # Reconstruct the monthly index of SSTa by projecting # these values onto the annual PC timeseries. pseudo_pc = solver.projectField(ds, neofs=2, eofscaling=1) # Set up as dataset. ds = eof.to_dataset() ds['pc'] = pseudo_pc ds['variance_fraction'] = variance ds = ds.rename({'eofs': 'eof'}) ds = ds.sel(mode=1) # Invert to the proper values for the bullseye. if ds.sel(lat=45.5, lon=210).eof < 0: pass else: ds['eof'] = ds['eof'] * -1 ds['pc'] = ds['pc'] * -1 # Change some attributes for the variables. ds['eof'].attrs['long_name'] = 'Correlation between PC and JFM SSTa' ds['pc'].attrs['long_name'] = 'Principal component for NPGO' # Add a description of methods for clarity. ds.attrs[ 'description'] = 'Second mode of JFM SSTa variability over 25-62N and 180-110W.' ds.attrs[ 'anomalies'] = 'Anomalies were computed by removing the ensemble mean at each grid cell.' ds.attrs['weighting'] = ( 'The native grid was regridded to a standard 1deg x 1deg (180x360) grid.' + 'Weighting was computed via the sqrt of the cosine of latitude.') print("Saving to netCDF...") ds.to_netcdf('/glade/p/work/rbrady/NPGO/NPGO.' + ens + '.' + str(sYear) + '-' + str(eYear) + '.nc')