Example #1
0
def split_region_by_lonlat(prec_labels,
                           label=int,
                           plot_s=0,
                           plot_l=0,
                           kwrgs_mask_latlon={}):

    # before:
    plot_maps.plot_labels(prec_labels.isel(split=plot_s, lag=plot_l))
    splits = list(prec_labels.split.values)
    lags = list(prec_labels.lag.values)
    copy_labels = prec_labels.copy()
    np_labels = copy_labels.values
    orig_labels = np.unique(prec_labels.values[~np.isnan(prec_labels.values)])
    print(f'\nNew label will become {max(orig_labels) + 1}')
    if max(orig_labels) >= 20:
        print('\nwarning, more then (or equal to) 20 regions')
    from itertools import product
    for s, l in product(splits, lags):
        i_s = splits.index(s)
        i_l = lags.index(l)
        single = copy_labels.sel(split=s, lag=l)
        orig_mask_label = ~np.isnan(single.where(single.values == label))
        for key, mask_latlon in kwrgs_mask_latlon.items():
            #            print(key, mask_latlon)
            mask_label = xrmask_by_latlon(orig_mask_label,
                                          **{str(key): mask_latlon})
        # mask_label = np.logical_and(~np.isnan(mask_label), mask_label!=0)
        mask_label = np.logical_and(np.isnan(mask_label), orig_mask_label)
        # assign new label
        single.values[mask_label.values] = max(orig_labels) + 1
        np_labels[i_s, i_l] = single.values
    copy_labels.values = np_labels
    # after
    plot_maps.plot_labels(copy_labels.isel(split=plot_s, lag=plot_l))
    return copy_labels, max(orig_labels) + 1
Example #2
0
def check(rg, list_of_name_path, cluster_nr):

    import matplotlib.pyplot as plt
    import core_pp

    t2m_path = list_of_name_path[0][1]
    t2m = core_pp.import_ds_lazy(t2m_path, format_lon='west_east')
    t2m_clus = t2m.sel(cluster=cluster_nr)

    sst_path = list_of_name_path[1][1]
    sst = core_pp.import_ds_lazy(sst_path, format_lon='west_east')

    swvl12_path = list_of_name_path[2][1]
    swvl12 = core_pp.import_ds_lazy(swvl12_path, format_lon='west_east')

    #example time series plot for first cluster
    plt.figure()
    t2m_clus.ts.plot()

    #check plot for sst
    plt.figure()
    sst[0].plot()

    #check plot for swvl
    plt.figure()
    swvl12[0].plot()

    # Check plot of clusters
    # if TVpath contains the xr.DataArray that is clustered beforehand, we can have a look at the spatial regions.
    ds = rg.get_clust(format_lon='west_east')
    fig = plot_maps.plot_labels(ds['xrclustered'],
                                kwrgs_plot={
                                    'col_dim': 'n_clusters',
                                    'title': 'Hierarchical Clustering',
                                    'cbar_tick_dict': {
                                        'labelsize': 8
                                    },
                                    'add_cfeature': 'BORDERS'
                                })
Example #3
0
def regrid_array(xr_or_filestr, to_grid, periodic=False):
    import functions_pp

    if type(xr_or_filestr) == str:
        xarray = core_pp.import_ds_lazy(xr_or_filestr)
        plot_maps.plot_corr_maps(xarray[0])
        xr_regrid = functions_pp.regrid_xarray(xarray,
                                               to_grid,
                                               periodic=periodic)
        plot_maps.plot_corr_maps(xr_regrid[0])
    else:
        plot_maps.plot_labels(xr_or_filestr)
        xr_regrid = functions_pp.regrid_xarray(xr_or_filestr,
                                               to_grid,
                                               periodic=periodic)
        plot_maps.plot_labels(xr_regrid)
        plot_maps.plot_labels(xr_regrid.where(xr_regrid.values == 3))
    return xr_regrid
Example #4
0
                                                     [1, 4])
    mask_cl = np.isnan(mask_cl)
elif region == 'init':
    mask_cl_e = find_precursors.view_or_replace_labels(xrclustered.copy(), [3])
    mask_cl_e = make_country_mask.binary_erosion(~np.isnan(mask_cl_e))
    mask_cl_w = ~np.isnan(
        find_precursors.view_or_replace_labels(xrclustered.copy(), [1]))
    mask_cl = ~np.logical_or(mask_cl_w, mask_cl_e)

title = np.array([['Clustered simultaneous high temperature events']])
fig = plot_maps.plot_labels(
    xrclustered, {
        'size': 3,
        'scatter': scatter,
        'zoomregion': selbox,
        'mask_xr': mask_cl,
        'x_ticks': np.arange(235, 310, 15),
        'y_ticks': np.arange(0, 61, 10),
        'add_cfeature': 'LAKES',
        'subtitles': title
    })  # np.isnan(mask_cl)
fig.set_facecolor('white')
fig.axes[0].set_facecolor('white')
f_name = 'scatter_clusters_t2m_{}_{}'.format(xrclustered.attrs['hash'], region)
filepath = os.path.join(rg.path_outmain, f_name)
plt.savefig(filepath + '.pdf', bbox_inches='tight')

# Without scatter points
fig = plot_maps.plot_labels(
    xrclustered, {
        'size': 3,
Example #5
0
    sep = '\\' # Windows folder seperator
else:
    sep = '/' # Mac/Linux folder seperator

curr_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) # script directory
main_dir = sep.join(curr_dir.split(sep)[:-1])
print(main_dir)
RGCPD_func = os.path.join(main_dir, 'RGCPD')
cluster_func = os.path.join(main_dir, 'clustering')
if RGCPD_func not in sys.path:
    sys.path.append(RGCPD_func)
    sys.path.append(cluster_func)
    sys.path.append(main_dir)

import clustering_spatial as cl
from RGCPD import RGCPD
import plot_maps
rg = RGCPD()

rg.pp_precursors()

rg.list_precur_pp


var_filename = rg.list_precur_pp[0][1]
mask = [145.0, 230.0, 20.0, 50.0]
for q in [85, 95]:
    xrclustered, results = cl.dendogram_clustering(var_filename, mask=mask, kwrgs_clust={'q':q, 'n_clusters':3})
    plot_maps.plot_labels(xrclustered)

Example #6
0
xr_mask = xarray.where(make_country_mask.binary_erosion(mask_US_CA))
# xr_mask =  xarray.where(make_country_mask.binary_erosion(np.nan_to_num(xr_mask)))
xr_mask.values[~np.isnan(xr_mask)] = 1
xr_mask = find_precursors.xrmask_by_latlon(xr_mask, upper_right=(270, 63))
# mask small Western US Island
xr_mask = find_precursors.xrmask_by_latlon(xr_mask, bottom_left=(228, 58))
# add Rocky mask
geo_surf_height = core_pp.import_ds_lazy(
    orography, var='z_NON_CDM', selbox=selbox) / 9.81
geo_surf_height = geo_surf_height.drop('time').drop('realization')
plot_maps.plot_corr_maps(geo_surf_height,
                         cmap=plt.cm.Oranges,
                         clevels=np.arange(0, 2600, 500))
max_height = 1500
mask_Rockies = geo_surf_height < max_height
plot_maps.plot_labels(mask_Rockies)
xr_mask = xr_mask.where(mask_Rockies)

plot_maps.plot_labels(xr_mask)

# In[9]:
# =============================================================================
# Clustering co-occurence of anomalies different tfreqs
# =============================================================================
q = 66
tfreq = [5, 10, 15, 30]
n_clusters = [4, 5, 6, 7, 8, 9, 10]
from time import time
t0 = time()
xrclustered, results = cl.dendogram_clustering(var_filename,
                                               mask=xr_mask,
Example #7
0
#%%
import make_country_mask
selbox = (225, 300, 20, 70)
xarray, Country = make_country_mask.create_mask(var_filename,
                                                kwrgs_load={'selbox': selbox},
                                                level='Countries')
mask_US = xarray.values == Country.US
lsm = core_pp.import_ds_lazy(LSM, selbox=selbox)
mask_US = np.logical_and(mask_US, (lsm > .3).values)
xr_mask = xarray.where(mask_US)
xr_mask.values[mask_US] = 1
xr_mask = xrmask_by_latlon(xr_mask, lonmin=237)
xr_mask = xrmask_by_latlon(xr_mask, lonmin=238, latmin=39)
xr_mask = xrmask_by_latlon(xr_mask, lonmin=239, latmin=38)
xr_mask = xrmask_by_latlon(xr_mask, lonmin=240, latmin=36)
plot_maps.plot_labels(xr_mask)

# In[9]:
# =============================================================================
# Clustering co-occurence of anomalies
# =============================================================================
q = [80, 85, 90, 95]
n_clusters = [2, 3, 4, 5, 6, 7, 8]
tfreq = 1
from time import time
t0 = time()
xrclustered, results = cl.dendogram_clustering(var_filename,
                                               mask=xr_mask,
                                               kwrgs_load={
                                                   'tfreq': tfreq,
                                                   'seldates':
        'cbar_vert': -0.1,
        'units': '',
        'map_proj': ccrs.PlateCarree(central_longitude=220),
        'y_ticks': False,
        'x_ticks': False,
        'subtitles': subtitles,
        'title': title,
        'title_fontdict': {
            'y': 1.2,
            'fontsize': 20
        },
        'col_dim': 'months',
        'row_dim': labels.dims[1]
    }

    fig = plot_maps.plot_labels(labels, kwrgs_plot=kwrgs_plot)

    fig_path = os.path.join(rg.path_outsub1, f_name + 'hor') + rg.figext

    plt.savefig(fig_path, bbox_inches='tight')
    # %%
    # vertical plot
    title = 'Clusters of \ncorrelating regions'
    f_name = 'labels_{}_a{}'.format(precur.name,
                                precur.alpha) + '_' + \
                                f'{experiment}_lag{corlags}_' + \
                                f'tf{precur_aggr}_{method}'
    subtitles = np.array([monthkeys]).reshape(-1, 1)
    kwrgs_plot = {
        'aspect': 2,
        'hspace': .3,
Example #9
0
CPPA_prec = func_CPPA.get_robust_precursors(precur_arr,
                                            RV,
                                            df_splits,
                                            lags_i=lags_i,
                                            kwrgs_CPPA=kwrgs_CPPA)
#%%
CPPA_prec['mask'] = ~CPPA_prec['weights'].astype(bool)
actor = func_CPPA.act('sst', CPPA_prec, precur_arr)
actor.distance_eps = 300
actor.min_area_in_degrees2 = 6
actor.group_split = 'together'
actor = find_precursors.cluster_DBSCAN_regions(actor)
actor.original_precur_labels = actor.prec_labels.copy()
if np.isnan(actor.prec_labels.values).all() == False:
    plot_maps.plot_labels(actor.prec_labels.copy())
    plt.show()
# splitting label that combines Pacific and Atlantic
kwrgs_mask_latlon = {'upper_right': (274, 10), 'lonmax': 283}
prec_labels, new_label = find_precursors.split_region_by_lonlat(
    actor.prec_labels.copy(),
    label=8,
    trialplot=False,
    plot_s=4,
    kwrgs_mask_latlon=kwrgs_mask_latlon)
#kwrgs_mask_latlon = {'upper_right': (274, 11)}
#prec_labels, new_label = find_precursors.split_region_by_lonlat(prec_labels,
#                                     label=8,
#                                     trialplot=False,
#                                     plot_s=4,
#                                     kwrgs_mask_latlon=kwrgs_mask_latlon)
Example #10
0
def process(rg, lags, fold_method, crossyr):
    import find_precursors, plot_maps
    #Preprocess precursors
    rg.pp_precursors(detrend=True,
                     anomaly=True,
                     selbox=None,
                     format_lon='west_east')

    #set any nan value in ts to 0
    # ds = rg.get_clust(format_lon='west_east')['ts'][:]
    # ds = ds[np.where(np.isnan(rg.get_clust(format_lon='west_east')['ts'][:]))]
    # rg.get_clust(format_lon='west_east')['ts'][np.where(np.isnan(rg.get_clust(format_lon='west_east')['ts'][:]))] = 0.0

    # ts plot
    rg.df_fullts.plot()

    # define train and test periods
    rg.traintest(method=fold_method, seed=1)
    testyrs = rg._get_testyrs()
    print(testyrs)

    # save target region plot
    target_cluster = int(rg.list_of_name_path[0][0])
    xrclustered = rg.get_clust(format_lon='west_east')['xrclustered']
    fig = plot_maps.plot_labels(
        find_precursors.view_or_replace_labels(xrclustered,
                                               regions=target_cluster))
    fig.savefig(
        os.path.join(rg.path_outsub1, 'target_cluster_{target_cluster}.jpeg'))
    # calculate correlation maps
    rg.calc_corr_maps()

    # show correlation maps
    rg.plot_maps_corr(kwrgs_plot={'clevels': np.arange(-.6, .61, 0.1)})

    #
    rg.cluster_list_MI()

    # define period names
    period_dict = {
        '01': 'January',
        '02': 'February',
        '03': 'March',
        '04': 'April',
        '05': 'May',
        '06': 'June',
        '07': 'July',
        '08': 'August',
        '09': 'September',
        '10': 'October',
        '11': 'November',
        '12': 'December'
    }
    periodnames = []
    if crossyr:
        for i in lags:
            month_nr_str = i[0][i[0].find("-") + 1:i[0].find("-") + 1 +
                                2]  #find first instace of "-" +2
            periodnames.append(period_dict[month_nr_str])
    else:
        for i in lags:
            month_nr_str = i[0][:2]  #find first instace of "-" +2
            periodnames.append(period_dict[month_nr_str])

    for i in range(len(rg.list_for_MI)):
        rg.list_for_MI[i].prec_labels['lag'] = ('lag', periodnames)
        rg.list_for_MI[i].corr_xr['lag'] = ('lag', periodnames)

    # View correlation regions
    rg.quick_view_labels(mean=True, save=True)
    rg.plot_maps_corr(save=True)

    # Handle precursor regions
    rg.get_ts_prec()
    count = rg._df_count  # how many times is each precursor regions found in the different training sets
    print(count)

    df_prec_regions = find_precursors.labels_to_df(
        rg.list_for_MI[0].prec_labels)
    df_prec_regions  # center lat,lon coordinates and size (in number of gridcells)

    return rg
Example #11
0
                    text = f'{int(RB[q]*n_spl)}/{count}'
                    temp.append([
                        lon + 10, lat + 5, text, {
                            'fontsize': 15,
                            'bbox': dict(facecolor='white', alpha=0.8)
                        }
                    ])
                textinmap.append([(i, 0), temp])

        mask = (np.isnan(CDlabels)).astype(bool)
        if ip == 0:
            kwrgs_plot = kwrgs_plotcorr_sst.copy()
        elif ip == 1:
            kwrgs_plot = kwrgs_plotcorr_SM.copy()
        # labels plot
        plot_maps.plot_labels(CDlabels.mean(dim='split'),
                              kwrgs_plot=kwrgs_plot)
        if save:
            if method == 'pcmci':
                dirpath = rg.path_outsub2
            else:
                dirpath = rg.path_outsub1
            plt.savefig(os.path.join(
                dirpath, f'{precur.name}_eps{precur.distance_eps}'
                f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_labels' +
                rg.figext),
                        bbox_inches='tight')

        # MCI values plot
        kwrgs_plot.update({
            'clevels': np.arange(-0.8, 0.9, .2),
            'textinmap': textinmap
    # Horizontal plot
    subtitles = np.array([monthkeys])

    title = 'Clusters of correlating regions [from $corr(SST_{t-1},$'+f'$\ T_t^{target[0].capitalize()})$]'
    kwrgs_plot = {'aspect':2, 'hspace':.35,
                  'wspace':-.32, 'size':2, 'cbar_vert':-0.1,
                  'units':'',
                  'map_proj':ccrs.PlateCarree(central_longitude=220),
                  'y_ticks':False,
                  'x_ticks':False,
                  'subtitles':subtitles,
                  'title':title,
                  'title_fontdict':{'y':1.2, 'fontsize':20},
                  'col_dim':'months', 'row_dim':labels.dims[1]}

    fig = plot_maps.plot_labels(labels, kwrgs_plot=kwrgs_plot)

    fig_path = os.path.join(rg.path_outsub1, f_name+'hor')+rg.figext

    plt.savefig(fig_path, bbox_inches='tight')
    # %%
    # vertical plot
    title = 'Clusters of \ncorrelating regions'
    f_name = 'labels_{}_a{}'.format(precur.name,
                                precur.alpha) + '_' + \
                                f'{experiment}_lag{corlags}_' + \
                                f'tf{precur_aggr}_{method}'
    subtitles = np.array([monthkeys]).reshape(-1,1)
    kwrgs_plot = {'aspect':2, 'hspace':.3,
                  'wspace':-.4, 'size':2, 'cbar_vert':0.06,
                  'units':'Corr. Coeff. [-]',
Example #13
0
def plot_regions(rg, save, plot_parcorr=False):
    # Get ConDepKeys
    df_pvals = rg.df_pvals.copy()
    df_corr = rg.df_corr.copy()
    periodnames = list(rg.list_for_MI[0].corr_xr.lag.values)

    CondDepKeys = {}
    for i, mon in enumerate(periodnames):
        list_mon = []
        _keys = [k for k in df_pvals.index if mon in k]  # month
        df_sig = df_pvals[df_pvals.loc[_keys] <= alpha_CI].dropna(
            axis=0, how='all')  # significant

        for k in df_sig.index:
            corr_val = df_corr.loc[k].mean()
            RB = (df_pvals.loc[k] < alpha_CI).sum()
            list_mon.append((k, corr_val, RB))
        CondDepKeys[mon] = list_mon

    for ip, precur in enumerate(rg.list_for_MI):
        # ip=0; precur = rg.list_for_MI[ip]

        CDlabels = precur.prec_labels.copy()

        if precur.group_lag:
            CDlabels = xr.concat([CDlabels] * len(periodnames), dim='lag')
            CDlabels['lag'] = ('lag', periodnames)
            CDcorr = precur.corr_xr_.copy()
        else:
            CDcorr = precur.corr_xr.copy()
        textinmap = []
        MCIstr = CDlabels.copy()
        for i, month in enumerate(CondDepKeys):

            CDkeys = [
                k[0] for k in CondDepKeys[month]
                if precur.name in k[0].split('..')[-1]
            ]
            MCIv = [
                k[1] for k in CondDepKeys[month]
                if precur.name in k[0].split('..')[-1]
            ]
            RB = [
                k[2] for k in CondDepKeys[month]
                if precur.name in k[0].split('..')[-1]
            ]
            region_labels = [
                int(l.split('..')[1]) for l in CDkeys
                if precur.name in l.split('..')[-1]
            ]
            f = find_precursors.view_or_replace_labels
            if len(CDkeys) != 0:
                if region_labels[0] == 0:  # pattern cov
                    region_labels = np.unique(
                        CDlabels[:,
                                 i].values[~np.isnan(CDlabels[:, i]).values])
                    region_labels = np.array(region_labels, dtype=int)
                    MCIv = np.repeat(MCIv, len(region_labels))
                    CDkeys = [
                        CDkeys[0].replace('..0..', f'..{r}..')
                        for r in region_labels
                    ]
            CDlabels[:, i] = f(CDlabels[:, i].copy(), region_labels)
            if plot_parcorr:
                MCIstr[:, i] = f(CDlabels[:, i].copy(),
                                 region_labels,
                                 replacement_labels=MCIv)
            else:
                MCIstr[:, i] = CDcorr[:, i].copy()

            # get text on robustness:
            if len(CDkeys) != 0:
                temp = []
                df_labelloc = find_precursors.labels_to_df(CDlabels[:, i])
                for q, k in enumerate(CDkeys):
                    l = int(k.split('..')[1])
                    if l == 0:  # pattern cov
                        lat, lon = df_labelloc.mean(0)[:2]
                    else:
                        lat, lon = df_labelloc.loc[l].iloc[:2].values.round(1)
                    if lon > 180: lon - 360
                    if precur.calc_ts != 'pattern cov':
                        count = rg._df_count[k]
                        text = f'{int(RB[q])}/{count}'
                        temp.append([
                            lon + 10, lat + 5, text, {
                                'fontsize': 15,
                                'bbox': dict(facecolor='white', alpha=0.8)
                            }
                        ])
                    elif precur.calc_ts == 'pattern cov' and q == 0:
                        count = rg._df_count[f'{month}..0..{precur.name}_sp']
                        text = f'{int(RB[0])}/{count}'
                        lon = float(CDlabels[:, i].longitude.mean())
                        lat = float(CDlabels[:, i].latitude.mean())
                        temp.append([
                            lon, lat, text, {
                                'fontsize': 15,
                                'bbox': dict(facecolor='white', alpha=0.8)
                            }
                        ])
                textinmap.append([(i, 0), temp])

        if ip == 0:
            kwrgs_plot = kwrgs_plotcorr_sst.copy()
        elif ip == 1:
            kwrgs_plot = kwrgs_plotcorr_SM.copy()
        # labels plot
        plot_maps.plot_labels(CDlabels.mean(dim='split'),
                              kwrgs_plot=kwrgs_plot)
        if save:
            if method == 'pcmci':
                dirpath = rg.path_outsub2
            else:
                dirpath = rg.path_outsub1
            plt.savefig(os.path.join(
                dirpath, f'{precur.name}_eps{precur.distance_eps}'
                f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_labels_'
                f'{periodnames[-1]}' + rg.figext),
                        bbox_inches='tight')

        # MCI values plot
        mask_xr = np.isnan(CDlabels).mean(dim='split') < 1.
        kwrgs_plot.update({
            'clevels': np.arange(-0.8, 0.9, .1),
            'textinmap': textinmap
        })
        fig = plot_maps.plot_corr_maps(MCIstr.where(mask_xr).mean(dim='split'),
                                       mask_xr=mask_xr,
                                       **kwrgs_plot)
        if save:
            fig.savefig(os.path.join(
                dirpath, f'{precur.name}_eps{precur.distance_eps}'
                f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_MCI_'
                f'{periodnames[-1]}' + rg.figext),
                        bbox_inches='tight')
Example #14
0
# r = np.meshgrid(xrclusteredall.n_clusters.astype(str).values)
r = xrclusteredall.n_clusters.astype(str).values
# subtitles = [f'n-clusters={r.flatten()[i]}, ' + f'random state={c.flatten()[i]}' for i in range(c.size)]
subtitles = [
    f'n-clusters={r.flatten()[i]}, linkage=ward, metric=euclidian'
    for i in range(r.size)
]
fig = plot_maps.plot_labels(xrclusteredall,
                            kwrgs_plot={
                                'wspace': .05,
                                'hspace': .17,
                                'cbar_vert': .045,
                                'row_dim': 'n_clusters',
                                'col_dim': 'linkage',
                                'zoomregion': selbox,
                                'cmap': cmp,
                                'x_ticks': np.array([260, 270, 280]),
                                'title': title,
                                'title_fontdict': {
                                    'y': .93,
                                    'fontsize': 18,
                                    'fontweight': 'bold'
                                }
                            })
for i, ax in enumerate(fig.axes[:-1]):
    np.isnan(xr_States).plot.contour(ax=ax,
                                     transform=plot_maps.ccrs.PlateCarree(),
                                     linestyles=['solid'],
                                     colors=['black'],
                                     linewidths=2,
                                     levels=[0, 1],