Exemple #1
0
    def correlation(self, v, dim, region=None):
        """Compute the correlation is the specified dimension.

        """
        self, v = align_latlon(self, v)
        dims = []
        if dim == 'time': dims = ['time']
        if dim == 'space': dims = [self.lat_name, self.lon_name]
        sds = self.ds if region is None else ilamb_regions.maskedDataset(
            region, self)
        vds = v.ds if region is None else ilamb_regions.maskedDataset(
            region, v)
        r = xr.corr(sds[self.varname], vds[v.varname], dim=dims)
        dims = ["'%s'" % d for d in dims]
        attrs = {}
        attrs['ilamb'] = "correlation(%s,%s,dim=[%s]); " % (
            self.varname, v.varname, ",".join(dims))
        r.attrs = attrs
        tm = self.ds['time_measure'] if ('time_measure' in self.ds
                                         and 'time' in r.dims) else None
        cm = self.ds['cell_measure'] if (
            'cell_measure' in self.ds and
            (self.lat_name in r.dims and self.lon_name in r.dims)) else None
        r = Variable(da=r,
                     varname="corr_%s_%s" % (self.varname, v.varname),
                     cell_measure=cm,
                     time_measure=tm)
        r.setAttr("units", "1")
        if r.ds[r.varname].size == 1: r = float(r.ds[r.varname])
        return r
Exemple #2
0
def annual_cycle_correlation(sim, ref, window: int = 15):
    """Annual cycle correlation.

    Pearson correlation coefficient between the smooth day-of-year averaged annual cycles of the simulation and
    the reference. In the smooth day-of-year averaged annual cycles, each day-of-year is averaged over all years
    and over a window of days around that day.

    Parameters
    ----------
    sim : xr.DataArray
      data from the simulation (a time-series for each grid-point)
    ref : xr.DataArray
      data from the reference (observations) (a time-series for each grid-point)
    window: int
      Size of window around each day of year around which to take the mean.
      E.g. If window=31, Jan 1st is averaged over from December 17th to January 16th.

    Returns
    -------
    xr.DataArray,
      Annual cycle correlation between the simulation and the reference

    """
    # group by day-of-year and window around each doy
    grouper_test = sdba.base.Grouper("time.dayofyear", window=window)
    # for each day, mean over X day window and over all years to create a smooth avg annual cycle
    sim_annual_cycle = grouper_test.apply("mean", sim)
    ref_annual_cycle = grouper_test.apply("mean", ref)
    out = xr.corr(ref_annual_cycle, sim_annual_cycle, dim="dayofyear")
    out.attrs.update(sim.attrs)
    out.attrs["long_name"] = "Correlation of the annual cycle"
    return out
def get_cor_rmsd_std(mme, obs):
    std = mme.std(dim='time')
    corre = xr.corr(obs, mme, dim='time')
    rmsd = xr.apply_ufunc(crmsd_func,
                          obs,
                          mme,
                          input_core_dims=[['time'], ['time']],
                          vectorize=True)
    return corre, rmsd, std
Exemple #4
0
def test_corr(da_a, da_b, dim):
    if dim is not None:

        def np_corr_ind(ts1, ts2, a, x):
            # Ensure the ts are aligned and missing values ignored
            ts1, ts2 = broadcast(ts1, ts2)
            valid_values = ts1.notnull() & ts2.notnull()

            ts1 = ts1.where(valid_values)
            ts2 = ts2.where(valid_values)

            return np.ma.corrcoef(
                np.ma.masked_invalid(ts1.sel(a=a, x=x).data.flatten()),
                np.ma.masked_invalid(ts2.sel(a=a, x=x).data.flatten()),
            )[0, 1]

        expected = np.zeros((3, 4))
        for a in [0, 1, 2]:
            for x in [0, 1, 2, 3]:
                expected[a, x] = np_corr_ind(da_a, da_b, a=a, x=x)
        actual = xr.corr(da_a, da_b, dim)
        assert_allclose(actual, expected)

    else:

        def np_corr(ts1, ts2):
            # Ensure the ts are aligned and missing values ignored
            ts1, ts2 = broadcast(ts1, ts2)
            valid_values = ts1.notnull() & ts2.notnull()

            ts1 = ts1.where(valid_values)
            ts2 = ts2.where(valid_values)

            return np.ma.corrcoef(
                np.ma.masked_invalid(ts1.data.flatten()),
                np.ma.masked_invalid(ts2.data.flatten()),
            )[0, 1]

        expected = np_corr(da_a, da_b)
        actual = xr.corr(da_a, da_b, dim)
        assert_allclose(actual, expected)
Exemple #5
0
def test_covcorr_consistency(da_a, da_b, dim):
    # Testing that xr.corr and xr.cov are consistent with each other
    # 1. Broadcast the two arrays
    da_a, da_b = broadcast(da_a, da_b)
    # 2. Ignore the nans
    valid_values = da_a.notnull() & da_b.notnull()
    da_a = da_a.where(valid_values)
    da_b = da_b.where(valid_values)

    expected = xr.cov(da_a, da_b, dim=dim,
                      ddof=0) / (da_a.std(dim=dim) * da_b.std(dim=dim))
    actual = xr.corr(da_a, da_b, dim=dim)
    assert_allclose(actual, expected)
Exemple #6
0
def calcSkill(model,verif,var_list):

    """
    Calculates correlation as a function of lead time
    for given list of variables using verif and model.
    """

    # Match verif and hindcast ics
    verif_match=verif.sel(ic=model['ic'])

    # Calculate correlation for specificed regions
    corr=[]
    for v in var_list:
        corr.append((xr.corr(model[v],verif_match[v],
                     dim='ic')).to_dataset(name=v))
    corr_regions=xr.merge(corr)

    return corr_regions
def boot_corr_ci(a1, a2, conf, nboots=1000):
    """ Output the conf% confidence interval on correlation between 
    two 1 dimensional arrays by bootstrapping with replacement

    Input:
        a1 = first array
        a2 = second array
        nboots = the number of bootstrap samples used to generate the ci
        conf = the confidence interval you want e.g., 95 for 95% ci

    Output:
        minci = the minimum range of the confidence interval
        maxci = the maximum range of the confidence interval
 
    This assumes a two sided test.
    """

    ptilemin = (100. - conf) / 2.
    ptilemax = conf + (100 - conf) / 2.

    if (a1.size != a2.size):
        print("The two arrays must have the same size")
        sys.exit()

    samplesize = a1.size
    ranu = np.random.uniform(0, samplesize, nboots * samplesize)
    ranu = np.floor(ranu).astype(int)

    bootdat = np.zeros([samplesize, nboots])
    bootdat1 = np.array(a1[ranu])
    bootdat2 = np.array(a2[ranu])
    bootdat1 = bootdat1.reshape([samplesize, nboots])
    bootdat2 = bootdat2.reshape([samplesize, nboots])

    bootcor = xr.corr(xr.DataArray(bootdat1),
                      xr.DataArray(bootdat2),
                      dim='dim_0')
    minci = np.percentile(bootcor, ptilemin)
    maxci = np.percentile(bootcor, ptilemax)

    return minci, maxci
def boot_corr_multimem(darrays, nboots, nmems, seed=None):
    """ Generate nboots bootstrap samples, with nmems members for each sample, from two darrays of the same coords. 
        Calculate correlation coefficient of the two variables across the nmems members for each sample.   

    Input: darrays = list of xarray data arrays with the sampling being performed on the first dimension
           nboots = the number of bootstrap samples
           nmems = the number of members in each bootstrap sample

    Output: bootdat = an xarray data array containing the bootstrap samples
            with dimensions nboots

    Option: a seed if you want to specify the seed for the random number generator 
    """

    dims = darrays[0].dims
    bootcoords = [("iboot", np.arange(0, nboots, 1))]

    # generate random numbers for bootstrapping
    if (seed):
        np.random.seed(seed)

    nmemin = darrays[0][dims[0]].size
    ranu = np.random.uniform(0, nmemin, nboots * nmems)
    ranu = np.floor(ranu).astype(int)

    ## select ensemble members and reshape into long time series for each bootstrap sample
    bootdat_var1 = np.array(darrays[0][ranu]).reshape(nboots, -1)
    bootdat_var2 = np.array(darrays[1][ranu]).reshape(nboots, -1)

    ## calculate correlation for each bootstrap
    bootdat_corr = xr.corr(xr.DataArray(bootdat_var1),
                           xr.DataArray(bootdat_var2),
                           dim="dim_1")

    bootdat = xr.DataArray(bootdat_corr, coords=bootcoords)

    return bootdat
Exemple #9
0
        prediction_val.values,
        linestyle='--',
        color='red')
ax1.plot(
    da_x_original.time.values,
    da_x_original.sel(variable='genesis_potential_index__gpi__for_tc').values,
    color='green',
    linestyle='--')
ax.legend(['Obs.', 'MLP_train', 'MLP_validation'])
ax1.legend(['GPI'])
ax.set_ylabel('Number of storms')
ax1.set_ylabel('GPI')
plt.title('Storms: Neural net. vs GPI')
plt.show()

xr.corr(prediction_val, da_y, dim='time')
xr.corr(da_x_original.sel(variable='genesis_potential_index__gpi__for_tc'),
        da_y,
        dim='time')
error_MLP_train = prediction_train - da_y

#  Interpreting
import numpy as np
da_x_test = da_x_scaled.copy(data=np.zeros(da_x_scaled.shape) + .5)
da_x_test.values[:, -2] = np.linspace(0, 1, da_x_test.shape[0])

prediction_test = model.predict(da_x_test.values)
da_x_test = da_x_test.copy(data=scaler_x.inverse_transform(da_x_test.values))
plt.scatter(x=da_x_test.values[:, -2],
            y=scaler_y.inverse_transform(prediction_test))
plt.show()
da_rain = da_rain.sel(latitude=slice(15, -40))
da = da.astype('float32')
da_rain = da_rain.astype('float32')
da_rain = da_rain.sortby('longitude')
da_rain = da_rain.sortby('latitude')
da_rain = da_rain.sel(latitude=da.latitude,
                      longitude=da.longitude,
                      method='nearest')
da_rain = da_rain.assign_coords(latitude=da.latitude)
da_rain = da_rain.assign_coords(longitude=da.longitude)
da_rain = da_rain.sel(time=da.time)

da_rain = da_rain.resample(time='2D').sum()
da = da.resample(time='2D').sum()

season_mask = [(pd.Timestamp(x).month % 12 + 3) // 3 for x in da.time.values]
season_mask_rain = [(pd.Timestamp(x).month % 12 + 3) // 3
                    for x in da_rain.time.values]
da['season_mask'] = ('time'), season_mask
da_rain['season_mask'] = ('time'), season_mask_rain
da = da.where(da.season_mask == season, drop=True)
da_rain = da_rain.where(da_rain.season_mask == season, drop=True)

da = da.load()
da = np.log(da)
da_rain = da_rain.load()
corr = xr.corr(da, da_rain, dim='time')
# da.to_netcdf(FTLEpath + f'Downsampled_ftle_{season}.nc')
# da_rain.to_netcdf(FTLEpath + f'Downsampled_{variable_dict[variable]}_{season}.nc')
corr.to_netcdf(FTLEpath + f'corr_{variable_dict[variable]}_{season}.nc')
Exemple #11
0
gpcp_precip = xr.open_dataset(
    '/disco/share/mp586/CSSP-postdoc/data/gpcp_daily_merge_noleap_JRA_55_grid_pentadsnoclim.nc'
)

## full precip fields
r1 = np.empty(len(model_list))
for i in range(len(model_list)):
    model = model_list[i]
    precip = xr.open_dataset('/disco/share/mp586/CSSP-postdoc/data/' + model +
                             '_pr_JRA_55_grid_pentadclim.nc')
    gpcp_p = xr.DataArray(
        np.asarray(gpcp_precip.precip),
        coords=[precip.time, gpcp_precip.lat, gpcp_precip.lon],
        dims=['time', 'lat', 'lon'])
    r1[i] = xr.corr(gpcp_p, precip.pr)
    print(model + ' = ' + str(r1[i]))

## precip fields in selected areas
r2 = np.empty(len(model_list))
for i in range(len(model_list)):
    model = model_list[i]
    precip = xr.open_dataset('/disco/share/mp586/CSSP-postdoc/data/' + model +
                             '_pr_JRA_55_grid_pentadclim.nc')
    gpcp_p = xr.DataArray(
        np.asarray(gpcp_precip.precip),
        coords=[precip.time, gpcp_precip.lat, gpcp_precip.lon],
        dims=['time', 'lat', 'lon'])
    r2[i] = xr.corr(
        gpcp_p.where(landmask_time == 0.).sel(lon=slice(90., 150.),
                                              lat=slice(-40., 40.)),
Exemple #12
0
def test_corr_only_dataarray():
    with pytest.raises(TypeError, match="Only xr.DataArray is supported"):
        xr.corr(xr.Dataset(), xr.Dataset())
Exemple #13
0
def pearson_correlation(data1, data2):
    # input data must be xarray.Datasets
    pc = xr.corr(data1, data2).values.item()
    return pc
# selecting winter season for index data
index_winter = index_crop.sel(time=index_crop['time.season'] == 'DJF')
'''
# Compute the Pearson correlation coefficient between
# two DataArray objects along a shared dimension
'''
# correlacao = {'Hs': xr.corr(dwave.swh.round(3),
#                             index_crop.round(3), dim='time'),
#               'Tp': xr.corr(dwave.mwp.round(3),
#                             index_crop.round(3), dim='time'),
#               'Wave Direction': xr.corr(dwave.mwd.round(3),
#                                         index_crop.round(3), dim='time')}

correlacao = xr.merge([
    xr.corr(dwave.swh.round(3), index_crop.round(3),
            dim='time').to_dataset(name='Hs'),
    xr.corr(dwave.mwp.round(3), index_crop.round(3),
            dim='time').to_dataset(name='Tp'),
    xr.corr(dwave.mwd.round(3), index_crop.round(3),
            dim='time').to_dataset(name='Mwd')
])

# saving correlation matrix as file
correlacao.to_netcdf(path=(finalpath + 'map_corr2_' + tele + '.nc'))

# plotting figures

# limites = [-104, 40, 20, 85]
# #clevs = np.linspace(-1, 1, 10)

# proj = ccrs.LambertConformal(central_longitude=-40,
Exemple #15
0
datetime_indices = {
    'YEAR': ppcs.index.year,
    'MONTH': ppcs.index.month,
    'DAY': np.ones(len(ppcs), dtype=int)
}
ppcs.index = pd.to_datetime(datetime_indices)
ppcs.index.name = 'time'

# excluindo os 10 primeiros anos para obter o numero certo de climatologias
ppcs = ppcs[slice(hs.isel(time=0).time.values,
                  hs.isel(time=-1).time.values)].to_xarray()
ppcs = ppcs.expand_dims({'latitude': hs.latitude, 'longitude': hs.longitude})

hs_ppcs = xr.merge([hs, ppcs])

correlacao = xr.corr(hs_ppcs.hs, hs_ppcs.pseudo_pcs,
                     dim='time').to_dataset(name='Hs')

plot_correlacao(correlacao, 'Hs', 'AO', 0.19)

# fazendo alguns testes
onda_media_climatologica = hs_ppcs.hs.mean(dim='time')
onda_media_indice_negativo = hs_ppcs.where(hs_ppcs.pseudo_pcs < 0,
                                           drop=True).hs.mean(dim='time')
anomalia_indice_negativo = (onda_media_indice_negativo -
                            onda_media_climatologica).to_dataset(name='Hs')
plot_correlacao(anomalia_indice_negativo,
                'Hs',
                'AO',
                0.5,
                clevs=np.linspace(-0.3, 0.3, 21))
onda_media_indice_extremo_negativo = hs_ppcs.where(
Exemple #16
0
# significant['Hs'].plot.contourf(colors='none',
#                                 hatches = ['///'],
#                                 transform=ccrs.PlateCarree(),
#                                 add_colorbar=False)

# if tempo == 'presente':
#     plt.title('Correlation ' + modelo + ' Hs/AO index - 1980 : 2009')
# elif tempo == 'futuro':
#     plt.title('Correlation ' + modelo + ' Hs/AO index - 2070 : 2100')

# selecionando agora só o periodo de inverno dos dois datasets
hs_deseason_ppcs_winter = hs_deseason_ppcs.sel(
    time=hs_deseason_ppcs['time.season'] == 'DJF')

corr_deseason_winter = xr.corr(hs_deseason_ppcs_winter[parametro],
                               hs_deseason_ppcs_winter.indice,
                               dim='time').to_dataset(name=parametro)

siglev = n30_80
fig, ax = faz_mapa_lambert()
cd = corr_deseason_winter[parametro].plot(levels=clevs,
                                          cmap=colormap,
                                          transform=ccrs.PlateCarree(),
                                          add_colorbar=False)
fig.colorbar(cd, orientation='horizontal', pad=0.03, shrink=0.8)
significant = corr_deseason_winter.where(abs(corr_deseason_winter) > siglev)

significant[parametro].plot.contourf(colors='none',
                                     hatches=['///'],
                                     transform=ccrs.PlateCarree(),
                                     add_colorbar=False)
def boot_corr_signif(a1, a2, conf, dim="Model", nboots=1000, seed=None):
    """ Output significance values for the correlation between a 1 dimensional array a1
    and a multi-dimensional array a2

    Input:
        a1 = first array
        a2 = second array
        conf = the confidence interval you want e.g., 95 for 95% ci
        dim = the dimension over which to perform eht correlation
    

    Output:
        signifdat = an array of dimensions a2 minus dim that contains
        1's for grid points where the correlation is significant and nan's otherwise 
 
    This assumes a two sided test.
    """

    samplesize = a1[dim].size

    ptilemin = (100. - conf) / 2.
    ptilemax = conf + (100 - conf) / 2.

    # set up the dimensions and coordinates
    dims = a2.dims
    signifcoords = []
    dimboot = [samplesize * nboots]
    dimboot2d = [samplesize, nboots]
    dimout = []
    for icoord in range(1, len(dims)):
        signifcoords.append((dims[icoord], a2[dims[icoord]]))
        dimboot.append(a2[dims[icoord]].size)
        dimboot2d.append(a2[dims[icoord]].size)
        dimout.append(a2[dims[icoord]].size)

    if (a1[dim].size != a2[dim].size):
        print("The two arrays must have the same size")
        sys.exit()

    if (seed):
        np.random.seed(seed)

    ranu = np.random.uniform(0, samplesize, nboots * samplesize)
    ranu = np.floor(ranu).astype(int)

    bootdat1 = np.array(a1[ranu])
    bootdat2 = np.array(a2[ranu])

    bootdat1 = bootdat1.reshape([samplesize, nboots])
    bootdat2 = bootdat2.reshape(dimboot2d)
    bootcor = xr.corr(xr.DataArray(bootdat1),
                      xr.DataArray(bootdat2),
                      dim="dim_0")

    minci = np.percentile(bootcor, ptilemin, axis=0)
    maxci = np.percentile(bootcor, ptilemax, axis=0)

    signifdat = np.zeros(dimout)
    signifdat = np.where(np.logical_or(minci > 0, maxci < 0), 1, 0)
    signifdat = xr.DataArray(signifdat, coords=signifcoords)

    return signifdat


plt.tight_layout()
plt.savefig('figs/Figure4.png',dpi=300)
plt.savefig('figs/vector/Figure4.eps')
plt.savefig('figs/vector/Figure4.pdf')
plt.show()

try:
    plt.savefig('figs/Figure4.jpeg',dpi=300)
except:
    pass
plt.show()


# Check correlation between new prod and sst
# %%
ev=[ep_dates,cp_dates,nina_dates,info.index,neutral]
for e in ev:
    sst_corr=sst.sel(time=e,method='nearest').sel(time=slice(np.datetime64('1997-09-01'),np.datetime64('2020-01-01')))#.mean(dim='time')-sst.sel(time=all_dates,method='nearest').mean(dim='time')
    avg_npp_corr=avg_npp.sel(time=e,method='nearest').sel(lat=slice(-15,15))#.mean(dim='time')-avg_npp.sel(time=all_dates,method='nearest').mean(dim='time')).sel(lat=slice(-15,15))

    startday=np.datetime64('2000-01-01')
    endday=np.datetime64('2020-01-01') 
        
    sst_corr=sst_corr.sel(time=slice(startday,endday))
    avg_npp_corr=avg_npp_corr.sel(time=slice(startday,endday))
    
    c=xr.corr(sst_corr,avg_npp_corr,dim='time').mean().values
    print(c)
def xr_pearson_r(a, b, dim=None, **kwargs):
    return xr.corr(a, b, dim)
Exemple #20
0
    actual = apply_ufunc(
        pandas_median,
        data_array.chunk({"x": 1}),
        input_core_dims=[["y"]],
        vectorize=True,
        dask="parallelized",
        output_dtypes=[int],
        meta=np.ndarray((0, 0), dtype=np.float),
    )

    assert_identical(expected, actual)
    assert np.float == actual.dtype


with raises_regex(TypeError, "Only xr.DataArray is supported"):
    xr.corr(xr.Dataset(), xr.Dataset())


def arrays_w_tuples():
    da = xr.DataArray(
        np.random.random((3, 21, 4)),
        coords={"time": pd.date_range("2000-01-01", freq="1D", periods=21)},
        dims=("a", "time", "x"),
    )

    arrays = [
        da.isel(time=range(0, 18)),
        da.isel(time=range(2, 20)).rolling(time=3, center=True).mean(),
        xr.DataArray([[1, 2], [1, np.nan]], dims=["x", "time"]),
        xr.DataArray([[1, 2], [np.nan, np.nan]], dims=["x", "time"]),
    ]
def xr_spearman_r(a, b, dim=None, **kwargs):
    return xr.corr(_rankdata(a, dim), _rankdata(b, dim), dim)
Exemple #22
0
def plot_corr(
    dem_da_sub,
    ifg_stack_sub,
    col_slices=[slice(None)],
    nbins=30,
    alpha=0.5,
    cor_thresh=0.4,
    cor_mean_sub=None,
):

    # cormean = sario.load("cor_stack_20190101_mean.tif")
    # cor_mean_sub = xr.DataArray(
    #     cormean, coords={"lat": ifg_stack_sub.lat, "lon": ifg_stack_sub.lon}
    # )
    # ifg_stack_sub[0].data[(cor_mean_sub > cor_thresh)].shape

    fig, axes = plt.subplots(4, len(col_slices), squeeze=False)
    for idx, col_slice in enumerate(col_slices):
        ax = axes[0, idx]
        dem_pixels = dem_da_sub[:, col_slice].stack(space=("lat", "lon"))
        ifg_pixels = ifg_stack_sub[:, :, col_slice].stack(space=("lat", "lon"))
        print("ifg pixels shape:", ifg_pixels.shape)

        # Option 1: get the correlation coefficients
        trendvals = xr.corr(dem_pixels, ifg_pixels, dim="space")
        bins = np.linspace(-1, 1, nbins)

        # # Option 2: fit a line, find the slope of the line
        # x, y = dem_da_sub.data, ifg_stack_sub.data
        # mask_na = np.logical_and(np.isnan(x), np.isnan(y))
        # pf = np.polyfit(x[~mask_na], y[~mask_na], 1)

        # ifg_mask = (
        #     cor_mean_sub < cor_thresh
        #     if (cor_thresh and cor_mean_sub is not None)
        #     else None
        # )
        # trendvals = xr.apply_ufunc(
        #     linear_trend,
        #     dem_pixels,
        #     ifg_pixels,
        #     ifg_mask,
        #     vectorize=True,
        #     input_core_dims=[
        #         ["space"],
        #         ["space"],
        #         ["space"],
        #     ],  # reduce along "space", leaving 1 per ifg
        # )
        # trendvals *= 10  # Go from cm/meter of slope to mm/meter
        # bins = nbins

        trendvals.plot.hist(ax=ax, bins=bins, alpha=alpha)

        # row 2: plot the phase vs elevation plot for one
        max_idx = np.abs(trendvals).argmax().item()
        breakpoint()
        max_rho = trendvals[max_idx].item()
        ax = axes[1, idx]
        max_idx = np.abs(trendvals).argmax().item()
        ax.scatter(dem_pixels, ifg_pixels.isel(ifg_idx=max_idx).data.ravel())
        ax.set_title(r"$\rho = $" + "{:.2f}".format(max_rho))

        # row 3: plot the DEM
        ax = axes[2, idx]
        axim = ax.imshow(dem_da_sub[:, col_slice], cmap="gist_earth")
        fig.colorbar(axim, ax=ax)

        # row 3: plot the ifg with the strongest phase vs elevation trend
        ax = axes[3, idx]
        axim = ax.imshow(ifg_stack_sub[max_idx, :, col_slice])
        ax.set_title(f"ifg_idx = {max_idx}")
        fig.colorbar(axim, ax=ax)
def visualize_synoptic_class_on_time_series(da_ts,
                                            path=climate_path,
                                            ax=None,
                                            leg_ncol=1,
                                            add_mm=False,
                                            leg_loc=1,
                                            second_da_ts=None,
                                            twin=None):
    import xarray as xr
    import matplotlib.pyplot as plt
    from aux_gps import replace_time_series_with_its_group
    time_dim = list(set(da_ts.dims))[0]
    assert xr.infer_freq(da_ts[time_dim]) == 'D'
    if ax is None:
        fig, ax = plt.subplots()
    # also calc the monthly means:
    if add_mm:
        da_ts_mm = replace_time_series_with_its_group(da_ts, grp='month')
        da_ts_mm.plot.line('k-.', ax=ax)
    if isinstance(da_ts, xr.Dataset):
        styles = ['r-', 'g-', 'b-']
        lns = []
        for i, st in enumerate(da_ts):
            lbl = st.upper()
            ln = da_ts[st].plot.line(styles[i],
                                     lw=2,
                                     ax=ax,
                                     zorder=20,
                                     label=lbl)
            lns.append(ln)
        da_ts = da_ts[st]
    else:
        # plot daily values:
        da_ts.plot.line('k-', lw=2, ax=ax, zorder=20)
    if second_da_ts is not None:
        # record the corr between second_da_ts and da_ts:
        corr_all = xr.corr(da_ts, second_da_ts).item()
        corr_oct = xr.corr(
            da_ts.sel(time=da_ts['time.month'] == 10),
            second_da_ts.sel(time=second_da_ts['time.month'] == 10)).item()
        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
        textstr = '\n'.join([
            'r_all = {:.2f}'.format(corr_all),
            'r_just_Oct = {:.2f}'.format(corr_oct)
        ])
        # textstr = 'r_all = {:.2f}'.format(corr)
        # place a text box in upper left in axes coords
        ax.text(0.05,
                0.95,
                textstr,
                transform=ax.transAxes,
                fontsize=14,
                verticalalignment='top',
                bbox=props)
        try:
            if second_da_ts.attrs['units'] == da_ts.attrs['units']:
                second_da_ts.plot.line('k--', lw=2, ax=ax, marker='o')
            else:
                twinx = ax.twinx()
                second_da_ts.plot.line('k--', lw=2, ax=twinx, marker='o')
                if twin is not None:
                    twinx.set_ylim(*twin)
        except KeyError:
            twinx = ax.twinx()
            second_da_ts.plot.line('k--', lw=2, ax=twinx, marker='o')
            if twin is not None:
                twinx.set_ylim(*twin)
    # ymin, ymax = ax.get_ylim()
    df = read_synoptic_classification(path, report=False)
    ind = da_ts.to_dataframe().index
    da_ts = align_synoptic_class_with_daily_dataset(da_ts)
    df = df.loc[ind]
    color_dict, edge_dict = choose_color_for_synoptic_classification()
    #    df['color'] = df['class'].map(color_dict)
    # monthly count of synoptics:
    month_counts = agg_month_count_syn_class(freq=False)
    min_year = da_ts[time_dim].min().dt.year.item()
    min_month = da_ts[time_dim].min().dt.month.item()
    max_year = da_ts[time_dim].max().dt.year.item()
    max_month = da_ts[time_dim].max().dt.month.item()
    min_dt = '{}-{}'.format(min_year, min_month)
    max_dt = '{}-{}'.format(max_year, max_month)
    month_counts = month_counts.sel(time=slice(min_dt, max_dt))
    # alternative count since we need not just monthly but by time slice:
    grp_dict = df.groupby('class').groups
    for key_class, key_ind in grp_dict.items():
        color = color_dict[key_class]
        edge_color = edge_dict[key_class]
        abbr = add_class_abbr(key_class)
        # abbr_count = month_counts.sel(syn_cls=key_class).sum().item()
        abbr_count = df[df['class'] == key_class].count().values[0]
        abbr_label = r'${{{}}}$: {}'.format(abbr, int(abbr_count))
        #    for ind, row in df.iterrows():
        da_ts[da_ts['syn_class'] == key_class].plot.line(
            'k-',
            lw=0,
            ax=ax,
            marker='o',
            markersize=20,
            markerfacecolor=color,
            markeredgewidth=2,
            markeredgecolor=edge_color,
            label=abbr_label)
        # ax.vlines(key_ind, 0, 80, colors=color, alpha=0.4, lw=10,
        #           label=abbr_label)
    ax.legend(ncol=leg_ncol, labelspacing=1.5, fontsize=12, loc=leg_loc)
    ax.grid()
    return ax
Exemple #24
0
umpd['time'] = obs.time
mpd['time'] = obs.time
umpdcor['time'] = obs.time
mpdcor['time'] = obs.time

G_umpd = (umpd.tasLut.std(dim='time') / umpdclim) / (obs.Tair.std(dim='time') /
                                                     obsclim)
G_mpd = (mpd.tasLut.std(dim='time') / mpdclim) / (obs.Tair.std(dim='time') /
                                                  obsclim)
G_umpdcor = (umpdcor.tasLut.std(dim='time') /
             umpdcorclim) / (obs.Tair.std(dim='time') / obsclim)
G_mpdcor = (mpdcor.tasLut.std(dim='time') /
            mpdcorclim) / (obs.Tair.std(dim='time') / obsclim)

P_umpd = xr.corr(umpd.tasLut, obs.Tair, dim='time')
P_mpd = xr.corr(mpd.tasLut, obs.Tair, dim='time')
P_umpdcor = xr.corr(umpdcor.tasLut, obs.Tair, dim='time')
P_mpdcor = xr.corr(mpdcor.tasLut, obs.Tair, dim='time')

kge_umpd = (1 - ((P_umpd - 1)**2 + (B_umpd - 1)**2 +
                 (G_umpd - 1)**2)**(0.5)).rename('kge')
kge_mpd = (1 - ((P_mpd - 1)**2 + (B_mpd - 1)**2 +
                (G_mpd - 1)**2)**(0.5)).rename('kge')
kge_umpdcor = (1 - ((P_umpdcor - 1)**2 + (B_umpdcor - 1)**2 +
                    (G_umpdcor - 1)**2)**(0.5)).rename('kge')
kge_mpdcor = (1 - ((P_mpdcor - 1)**2 + (B_mpdcor - 1)**2 +
                   (G_mpdcor - 1)**2)**(0.5)).rename('kge')

kge_umpd.to_netcdf('../output/kge_umpd_laf_' + model + '.nc')
kge_umpdcor.to_netcdf('../output/kge_umpd_laf_' + model + '.nc')
    lons_boxd=[245,245,280,280]
    lats_boxe=[10,14,14,10]
    lons_boxe=[150,150,260,260]
    lats_boxf=[-14,-10,-10,-14]
    lons_boxf=[150,150,240,240]

    draw_screen_poly(lats_boxa,lons_boxa,m)
    draw_screen_poly(lats_boxb,lons_boxb,m)
    draw_screen_poly(lats_boxc,lons_boxc,m)
    draw_screen_poly(lats_boxd,lons_boxd,m)
    draw_screen_poly(lats_boxe,lons_boxe,m)
    draw_screen_poly(lats_boxf,lons_boxf,m)


ws1d=xr.open_dataset('processed/ws_1deg.nc').windspeed
corr=xr.corr(pco2,ws1d,dim='time')
#corr=xr.corr(co2,newprod,dim='time')

fig=plt.figure(figsize=(20,15))
prec_dt=prec_1d.precip.copy(data=detrend(prec_1d.precip.fillna(-999)))
ws1d_dt=ws1d.copy(data=detrend(ws1d.fillna(-999)))


# NEW PRODUCTION AND WINDSPEED
corr=xr.corr(newprod,prec_1d.precip,dim='time')
#pv=get_spatial_trends(newprod,ws1d.sel(time=slice(startday,endday),lat=slice(-15,15))).pval
ax2=fig.add_subplot(8,2,1)
m=plot_basemap()
lo,la=np.meshgrid(newprod.lon.values,newprod.lat.values)
lo1,la1=m(lo,la)
m.contourf(lo1,la1,corr,cmap='bwr',levels=np.arange(-1,1.1,0.25))#,levels=np.arange(-0.001,0.0012,0.0002))