Ejemplo n.º 1
0
def test_project_onto_eof_values(shape, weight, wrap):
    """Test projection onto fields used to calculate eofs"""

    data = example_da(shape, wrap=wrap)
    field = example_da(shape, wrap=wrap)
    lat_dim = f"dim_{len(shape)-1}"
    xeof.core.LAT_NAME = lat_dim

    if weight == "none":
        weights = None
    elif weight == "sqrt_cos_lat":
        weights = np.cos(data[lat_dim] * np.pi / 180)**0.5

    sensor_dims = [f"dim_{i}" for i in range(1, len(shape))]
    eofs = eof(
        data,
        sensor_dims=sensor_dims,
        sample_dim="time",
        weight=weight,
        n_modes=np.inf,
        norm_PCs=False,
    )

    pc_res = project_onto_eof(field,
                              eofs["eof"],
                              sensor_dims=sensor_dims,
                              weight=weight)

    Eof_solver = Eof(data, weights=weights, center=False)
    pc_ver = Eof_solver.projectField(field)
    # Our mode start at 1:
    pc_ver = pc_ver.assign_coords({"mode": pc_ver["mode"] + 1})

    npt.assert_allclose(pc_res - pc_ver, 0.0, atol=1e-10)
Ejemplo n.º 2
0
def compute_npgo_index():
    """Computes the North Pacific Gyre Oscillation (NPGO) index from SST output.

    References:
        * Di Lorenzo, E. and N. Mantua, 2016: Multi-year persistence of the 2014/15
          North Pacific marine heatwave. Nature Climate Change, 6(11) 1042-+,
          doi:10.1038/nclimate3082.
    """
    sst = load_single_variable("SST")
    area = xr.open_dataarray("data/area.nc")
    # Extract North Pacific region for Principal Component Analysis (PCA).
    sst = extract_region(sst, sst.TLONG, sst.TLAT, [180, 250, 25, 62])
    area = extract_region(area, area.TLONG, area.TLAT, [180, 250, 25, 62])
    sst_anom = calculate_anomaly(sst)
    month = sst_anom["time.month"]
    # Calculate PCA over the winter (JFM) annual means.
    JFM = month <= 3
    # EOF package requires `time` to be the leading dimension.
    sst_anom_winter = (
        sst_anom.where(JFM).resample(time="A").mean("time").transpose("time", ...)
    )
    # Compute PCA (EOF) over the annual winter averages, weighted by the grid cell area.
    solver = Eof(sst_anom_winter, weights=area, center=False)
    # Reconstruct the monthly index of SSTa by projecting the SSTa values onto the
    # annual principal component time series.
    pseudo_pc = solver.projectField(
        sst_anom.transpose("time", ...), neofs=2, eofscaling=1
    )
    # Mode 0 is the Pacific Decadal Oscillation. We're interested in its orthogonal
    # mode, which is the NPGO. The NPGO still explains ~20% of the variance of the
    # North Pacific.
    return pseudo_pc.isel(mode=1)
Ejemplo n.º 3
0
def compute_relative_entropy(
    initialized,
    control,
    anomaly_data=False,
    neofs=None,
    curv=True,
    nlead=None,
    nmember_control=10,
):
    """
    Compute relative entropy.

    Calculates EOFs from anomalies. Projects fields on EOFs to receive
    pseudo-Principle Components per init and lead year. Calculate
    relative entropy based on _relative_entropy_formula.

    Args:
        initialized (xr.Dataset): anomaly ensemble data with dimensions
                                    lead, member, time and
                                    spatial [lon (x), lat(y)].
                                    DPLE or PM_ds
        control (xr.Dataset): anomaly control distribution with
                                              non-spatial dimensions:
                                              spatial [lon (x), lat(y)].
                                              - LENS: member, time
                                              - PM_control: time
        anomaly_data (bool): Input data is anomaly alread. Default: False.
        neofs (int): number of EOFs to use.
                     Default: initialized.member.size.
        curv (bool): if curvilinear grids disables EOF weights.
        nlead (int): number of timesteps calculated.
        nmember_control (int): number of members created from
                               bootstrapping from control

    Returns:
        rel_ent (xr.Dataset): relative entropy
    """
    # Defaults
    if neofs is None:
        neofs = initialized.member.size
    if nlead is None:
        nlead = initialized.lead.size

    # case if you submit control with dim time and member, LENS case
    if "member" in control.dims:
        control_uninitialized = _bootstrap_dim(
            control,
            initialized.lead.size,
            dim="init",
            dim_label=list(initialized.init.values),
        )

    # case if you only submit control with dim time, PM case
    else:
        control_uninitialized = xr.concat(
            [
                _bootstrap_dim(
                    control,
                    initialized.lead.size,
                    dim="member",
                    dim_label=np.arange(nmember_control),
                ) for _ in range(initialized.init.size)
            ],
            dim="init",
        )
        control_uninitialized["init"] = initialized.init.values

    # initialized and control_uninitialized are allowed to have different
    # dims as I need more members to sample my control distr. properly
    if set(initialized.dims) != set(control_uninitialized.dims):
        warnings.warn(
            "Warning: initialized and control_uninitialized have different coords."
        )
        # print(initialized, control_uninitialized)

    # convert to xr.Data.Array
    if isinstance(control_uninitialized, xr.Dataset):
        control_uninitialized = control_uninitialized.to_array().squeeze()
    if isinstance(initialized, xr.Dataset):
        initialized = initialized.to_array().squeeze()

    # detrend
    non_spatial_dims = set(control_uninitialized.dims).intersection(
        ["init", "member"])
    non_spatial_dims = list(non_spatial_dims)
    if not anomaly_data:  # if ds, control are raw values
        anom_x = initialized - control_uninitialized.mean(non_spatial_dims)
        anom_b = control_uninitialized - control_uninitialized.mean(
            non_spatial_dims)
    else:  # leave as is when already anomalies
        anom_x = initialized
        anom_b = control_uninitialized

    # prepare for EOF
    if curv:  # if curvilinear lon(x,y), lat(x,y) data inputs
        wgts = None
    else:  # assumes there is 'lat' in coords
        coslat = np.cos(np.deg2rad(anom_x.coords["lat"].values))
        wgts = np.sqrt(coslat)[..., np.newaxis]

    # EOF requires xr.dataArray
    if isinstance(control, xr.Dataset):
        control = control.to_array().squeeze()

    if "member" in control.dims:  # LENS
        # stack member and init into time dim, make time first
        non_spatial_control_dims = list(
            set(control.dims).intersection(["time", "member"]))

        transpose_dims = list(control.dims)
        transpose_dims.remove("member")
        transpose_dims.remove("time")
        dims = tuple(["time"] + transpose_dims)

        base_to_calc_eofs = (control.stack(
            new=tuple(non_spatial_control_dims)).rename({
                "new": "time"
            }).set_index({
                "time": "time"
            }).transpose(*dims))
    else:
        # PM_control
        base_to_calc_eofs = control

    solver = Eof(base_to_calc_eofs, weights=wgts)

    re_leadtime_list = []
    leads = initialized.lead.values[:nlead]
    inits = initialized.init.values
    # DoTo: parallelize this double loop
    for init in inits:  # loop over inits
        rl, sl, dl = ([] for _ in range(3))  # lists to store results in
        for lead in leads:  # loop over lead time
            # P_b base distribution # eofs require time
            pc_b = solver.projectField(
                anom_b.sel(init=init,
                           lead=lead).drop("lead").rename({"member": "time"}),
                neofs=neofs,
                eofscaling=0,
                weighted=False,
            ).rename({"time": "lead"})

            mu_b = pc_b.mean("lead")
            sigma_b = xr.DataArray(np.cov(pc_b.T))

            # P_x init distribution
            pc_x = solver.projectField(
                anom_x.sel(init=init,
                           lead=lead).drop("lead").rename({"member": "time"}),
                neofs=neofs,
                eofscaling=0,
                weighted=False,
            ).rename({"time": "lead"})

            mu_x = pc_x.mean("lead")
            sigma_x = xr.DataArray(np.cov(pc_x.T))

            r, d, s = _relative_entropy_formula(sigma_b, sigma_x, mu_x, mu_b,
                                                neofs)

            rl.append(r)
            sl.append(s)
            dl.append(d)

        re_leadtime_list.append(
            xr.Dataset({
                "R": ("lead", rl),
                "S": ("lead", sl),
                "D": ("lead", dl)
            }))

    re = xr.concat(re_leadtime_list, dim="init").assign(init=inits, lead=leads)

    return re
Ejemplo n.º 4
0
def main():
    ens = sys.argv[1]
    sYear = sys.argv[2]
    eYear = sys.argv[3]
    if int(sYear) < 1920:
        raise ValueError("Starting year must be 1920 or later.")
    if int(eYear) > 2100:
        raise ValueError("End year must be 2100 or earlier.")
    print("Computing NPGO for ensemble number " + ens + "...")
    filepath = ('/glade/scratch/rbrady/EBUS_BGC_Variability/' +
                'global_residuals/SST/remapped/remapped.SST.' + ens +
                '.192001-210012.nc')
    print("Global residuals loaded...")
    ds = xr.open_dataset(filepath)
    ds = ds['SST'].squeeze()
    # Make time dimension readable through xarray.
    ds['time'] = pd.date_range('1920-01', '2101-01', freq='M')
    # Reduce to time period of interest.
    ds = ds.sel(time=slice(sYear + '-01', eYear + '-12'))
    # Slice down to Northeast Pacific domain.
    ds = ds.sel(lat=slice(25, 62), lon=slice(180, 250))
    # Take annual JFM means.
    month = ds['time.month']
    JFM = (month <= 3)
    ds_winter = ds.where(JFM).resample('A', 'time')
    # Compute EOF
    coslat = np.cos(np.deg2rad(ds_winter.lat.values))
    wgts = np.sqrt(coslat)[..., np.newaxis]
    solver = Eof(ds_winter, weights=wgts, center=False)
    print("NPGO computed.")
    eof = solver.eofsAsCorrelation(neofs=2)
    variance = solver.varianceFraction(neigs=2)
    # Reconstruct the monthly index of SSTa by projecting
    # these values onto the annual PC timeseries.
    pseudo_pc = solver.projectField(ds, neofs=2, eofscaling=1)
    # Set up as dataset.
    ds = eof.to_dataset()
    ds['pc'] = pseudo_pc
    ds['variance_fraction'] = variance
    ds = ds.rename({'eofs': 'eof'})
    ds = ds.sel(mode=1)
    # Invert to the proper values for the bullseye.
    if ds.sel(lat=45.5, lon=210).eof < 0:
        pass
    else:
        ds['eof'] = ds['eof'] * -1
        ds['pc'] = ds['pc'] * -1
    # Change some attributes for the variables.
    ds['eof'].attrs['long_name'] = 'Correlation between PC and JFM SSTa'
    ds['pc'].attrs['long_name'] = 'Principal component for NPGO'
    # Add a description of methods for clarity.
    ds.attrs[
        'description'] = 'Second mode of JFM SSTa variability over 25-62N and 180-110W.'
    ds.attrs[
        'anomalies'] = 'Anomalies were computed by removing the ensemble mean at each grid cell.'
    ds.attrs['weighting'] = (
        'The native grid was regridded to a standard 1deg x 1deg (180x360) grid.'
        + 'Weighting was computed via the sqrt of the cosine of latitude.')
    print("Saving to netCDF...")
    ds.to_netcdf('/glade/p/work/rbrady/NPGO/NPGO.' + ens + '.' + str(sYear) +
                 '-' + str(eYear) + '.nc')