def split_region_by_lonlat(prec_labels, label=int, plot_s=0, plot_l=0, kwrgs_mask_latlon={}): # before: plot_maps.plot_labels(prec_labels.isel(split=plot_s, lag=plot_l)) splits = list(prec_labels.split.values) lags = list(prec_labels.lag.values) copy_labels = prec_labels.copy() np_labels = copy_labels.values orig_labels = np.unique(prec_labels.values[~np.isnan(prec_labels.values)]) print(f'\nNew label will become {max(orig_labels) + 1}') if max(orig_labels) >= 20: print('\nwarning, more then (or equal to) 20 regions') from itertools import product for s, l in product(splits, lags): i_s = splits.index(s) i_l = lags.index(l) single = copy_labels.sel(split=s, lag=l) orig_mask_label = ~np.isnan(single.where(single.values == label)) for key, mask_latlon in kwrgs_mask_latlon.items(): # print(key, mask_latlon) mask_label = xrmask_by_latlon(orig_mask_label, **{str(key): mask_latlon}) # mask_label = np.logical_and(~np.isnan(mask_label), mask_label!=0) mask_label = np.logical_and(np.isnan(mask_label), orig_mask_label) # assign new label single.values[mask_label.values] = max(orig_labels) + 1 np_labels[i_s, i_l] = single.values copy_labels.values = np_labels # after plot_maps.plot_labels(copy_labels.isel(split=plot_s, lag=plot_l)) return copy_labels, max(orig_labels) + 1
def check(rg, list_of_name_path, cluster_nr): import matplotlib.pyplot as plt import core_pp t2m_path = list_of_name_path[0][1] t2m = core_pp.import_ds_lazy(t2m_path, format_lon='west_east') t2m_clus = t2m.sel(cluster=cluster_nr) sst_path = list_of_name_path[1][1] sst = core_pp.import_ds_lazy(sst_path, format_lon='west_east') swvl12_path = list_of_name_path[2][1] swvl12 = core_pp.import_ds_lazy(swvl12_path, format_lon='west_east') #example time series plot for first cluster plt.figure() t2m_clus.ts.plot() #check plot for sst plt.figure() sst[0].plot() #check plot for swvl plt.figure() swvl12[0].plot() # Check plot of clusters # if TVpath contains the xr.DataArray that is clustered beforehand, we can have a look at the spatial regions. ds = rg.get_clust(format_lon='west_east') fig = plot_maps.plot_labels(ds['xrclustered'], kwrgs_plot={ 'col_dim': 'n_clusters', 'title': 'Hierarchical Clustering', 'cbar_tick_dict': { 'labelsize': 8 }, 'add_cfeature': 'BORDERS' })
def regrid_array(xr_or_filestr, to_grid, periodic=False): import functions_pp if type(xr_or_filestr) == str: xarray = core_pp.import_ds_lazy(xr_or_filestr) plot_maps.plot_corr_maps(xarray[0]) xr_regrid = functions_pp.regrid_xarray(xarray, to_grid, periodic=periodic) plot_maps.plot_corr_maps(xr_regrid[0]) else: plot_maps.plot_labels(xr_or_filestr) xr_regrid = functions_pp.regrid_xarray(xr_or_filestr, to_grid, periodic=periodic) plot_maps.plot_labels(xr_regrid) plot_maps.plot_labels(xr_regrid.where(xr_regrid.values == 3)) return xr_regrid
[1, 4]) mask_cl = np.isnan(mask_cl) elif region == 'init': mask_cl_e = find_precursors.view_or_replace_labels(xrclustered.copy(), [3]) mask_cl_e = make_country_mask.binary_erosion(~np.isnan(mask_cl_e)) mask_cl_w = ~np.isnan( find_precursors.view_or_replace_labels(xrclustered.copy(), [1])) mask_cl = ~np.logical_or(mask_cl_w, mask_cl_e) title = np.array([['Clustered simultaneous high temperature events']]) fig = plot_maps.plot_labels( xrclustered, { 'size': 3, 'scatter': scatter, 'zoomregion': selbox, 'mask_xr': mask_cl, 'x_ticks': np.arange(235, 310, 15), 'y_ticks': np.arange(0, 61, 10), 'add_cfeature': 'LAKES', 'subtitles': title }) # np.isnan(mask_cl) fig.set_facecolor('white') fig.axes[0].set_facecolor('white') f_name = 'scatter_clusters_t2m_{}_{}'.format(xrclustered.attrs['hash'], region) filepath = os.path.join(rg.path_outmain, f_name) plt.savefig(filepath + '.pdf', bbox_inches='tight') # Without scatter points fig = plot_maps.plot_labels( xrclustered, { 'size': 3,
sep = '\\' # Windows folder seperator else: sep = '/' # Mac/Linux folder seperator curr_dir = os.path.dirname(os.path.abspath(inspect.getfile(inspect.currentframe()))) # script directory main_dir = sep.join(curr_dir.split(sep)[:-1]) print(main_dir) RGCPD_func = os.path.join(main_dir, 'RGCPD') cluster_func = os.path.join(main_dir, 'clustering') if RGCPD_func not in sys.path: sys.path.append(RGCPD_func) sys.path.append(cluster_func) sys.path.append(main_dir) import clustering_spatial as cl from RGCPD import RGCPD import plot_maps rg = RGCPD() rg.pp_precursors() rg.list_precur_pp var_filename = rg.list_precur_pp[0][1] mask = [145.0, 230.0, 20.0, 50.0] for q in [85, 95]: xrclustered, results = cl.dendogram_clustering(var_filename, mask=mask, kwrgs_clust={'q':q, 'n_clusters':3}) plot_maps.plot_labels(xrclustered)
xr_mask = xarray.where(make_country_mask.binary_erosion(mask_US_CA)) # xr_mask = xarray.where(make_country_mask.binary_erosion(np.nan_to_num(xr_mask))) xr_mask.values[~np.isnan(xr_mask)] = 1 xr_mask = find_precursors.xrmask_by_latlon(xr_mask, upper_right=(270, 63)) # mask small Western US Island xr_mask = find_precursors.xrmask_by_latlon(xr_mask, bottom_left=(228, 58)) # add Rocky mask geo_surf_height = core_pp.import_ds_lazy( orography, var='z_NON_CDM', selbox=selbox) / 9.81 geo_surf_height = geo_surf_height.drop('time').drop('realization') plot_maps.plot_corr_maps(geo_surf_height, cmap=plt.cm.Oranges, clevels=np.arange(0, 2600, 500)) max_height = 1500 mask_Rockies = geo_surf_height < max_height plot_maps.plot_labels(mask_Rockies) xr_mask = xr_mask.where(mask_Rockies) plot_maps.plot_labels(xr_mask) # In[9]: # ============================================================================= # Clustering co-occurence of anomalies different tfreqs # ============================================================================= q = 66 tfreq = [5, 10, 15, 30] n_clusters = [4, 5, 6, 7, 8, 9, 10] from time import time t0 = time() xrclustered, results = cl.dendogram_clustering(var_filename, mask=xr_mask,
#%% import make_country_mask selbox = (225, 300, 20, 70) xarray, Country = make_country_mask.create_mask(var_filename, kwrgs_load={'selbox': selbox}, level='Countries') mask_US = xarray.values == Country.US lsm = core_pp.import_ds_lazy(LSM, selbox=selbox) mask_US = np.logical_and(mask_US, (lsm > .3).values) xr_mask = xarray.where(mask_US) xr_mask.values[mask_US] = 1 xr_mask = xrmask_by_latlon(xr_mask, lonmin=237) xr_mask = xrmask_by_latlon(xr_mask, lonmin=238, latmin=39) xr_mask = xrmask_by_latlon(xr_mask, lonmin=239, latmin=38) xr_mask = xrmask_by_latlon(xr_mask, lonmin=240, latmin=36) plot_maps.plot_labels(xr_mask) # In[9]: # ============================================================================= # Clustering co-occurence of anomalies # ============================================================================= q = [80, 85, 90, 95] n_clusters = [2, 3, 4, 5, 6, 7, 8] tfreq = 1 from time import time t0 = time() xrclustered, results = cl.dendogram_clustering(var_filename, mask=xr_mask, kwrgs_load={ 'tfreq': tfreq, 'seldates':
'cbar_vert': -0.1, 'units': '', 'map_proj': ccrs.PlateCarree(central_longitude=220), 'y_ticks': False, 'x_ticks': False, 'subtitles': subtitles, 'title': title, 'title_fontdict': { 'y': 1.2, 'fontsize': 20 }, 'col_dim': 'months', 'row_dim': labels.dims[1] } fig = plot_maps.plot_labels(labels, kwrgs_plot=kwrgs_plot) fig_path = os.path.join(rg.path_outsub1, f_name + 'hor') + rg.figext plt.savefig(fig_path, bbox_inches='tight') # %% # vertical plot title = 'Clusters of \ncorrelating regions' f_name = 'labels_{}_a{}'.format(precur.name, precur.alpha) + '_' + \ f'{experiment}_lag{corlags}_' + \ f'tf{precur_aggr}_{method}' subtitles = np.array([monthkeys]).reshape(-1, 1) kwrgs_plot = { 'aspect': 2, 'hspace': .3,
CPPA_prec = func_CPPA.get_robust_precursors(precur_arr, RV, df_splits, lags_i=lags_i, kwrgs_CPPA=kwrgs_CPPA) #%% CPPA_prec['mask'] = ~CPPA_prec['weights'].astype(bool) actor = func_CPPA.act('sst', CPPA_prec, precur_arr) actor.distance_eps = 300 actor.min_area_in_degrees2 = 6 actor.group_split = 'together' actor = find_precursors.cluster_DBSCAN_regions(actor) actor.original_precur_labels = actor.prec_labels.copy() if np.isnan(actor.prec_labels.values).all() == False: plot_maps.plot_labels(actor.prec_labels.copy()) plt.show() # splitting label that combines Pacific and Atlantic kwrgs_mask_latlon = {'upper_right': (274, 10), 'lonmax': 283} prec_labels, new_label = find_precursors.split_region_by_lonlat( actor.prec_labels.copy(), label=8, trialplot=False, plot_s=4, kwrgs_mask_latlon=kwrgs_mask_latlon) #kwrgs_mask_latlon = {'upper_right': (274, 11)} #prec_labels, new_label = find_precursors.split_region_by_lonlat(prec_labels, # label=8, # trialplot=False, # plot_s=4, # kwrgs_mask_latlon=kwrgs_mask_latlon)
def process(rg, lags, fold_method, crossyr): import find_precursors, plot_maps #Preprocess precursors rg.pp_precursors(detrend=True, anomaly=True, selbox=None, format_lon='west_east') #set any nan value in ts to 0 # ds = rg.get_clust(format_lon='west_east')['ts'][:] # ds = ds[np.where(np.isnan(rg.get_clust(format_lon='west_east')['ts'][:]))] # rg.get_clust(format_lon='west_east')['ts'][np.where(np.isnan(rg.get_clust(format_lon='west_east')['ts'][:]))] = 0.0 # ts plot rg.df_fullts.plot() # define train and test periods rg.traintest(method=fold_method, seed=1) testyrs = rg._get_testyrs() print(testyrs) # save target region plot target_cluster = int(rg.list_of_name_path[0][0]) xrclustered = rg.get_clust(format_lon='west_east')['xrclustered'] fig = plot_maps.plot_labels( find_precursors.view_or_replace_labels(xrclustered, regions=target_cluster)) fig.savefig( os.path.join(rg.path_outsub1, 'target_cluster_{target_cluster}.jpeg')) # calculate correlation maps rg.calc_corr_maps() # show correlation maps rg.plot_maps_corr(kwrgs_plot={'clevels': np.arange(-.6, .61, 0.1)}) # rg.cluster_list_MI() # define period names period_dict = { '01': 'January', '02': 'February', '03': 'March', '04': 'April', '05': 'May', '06': 'June', '07': 'July', '08': 'August', '09': 'September', '10': 'October', '11': 'November', '12': 'December' } periodnames = [] if crossyr: for i in lags: month_nr_str = i[0][i[0].find("-") + 1:i[0].find("-") + 1 + 2] #find first instace of "-" +2 periodnames.append(period_dict[month_nr_str]) else: for i in lags: month_nr_str = i[0][:2] #find first instace of "-" +2 periodnames.append(period_dict[month_nr_str]) for i in range(len(rg.list_for_MI)): rg.list_for_MI[i].prec_labels['lag'] = ('lag', periodnames) rg.list_for_MI[i].corr_xr['lag'] = ('lag', periodnames) # View correlation regions rg.quick_view_labels(mean=True, save=True) rg.plot_maps_corr(save=True) # Handle precursor regions rg.get_ts_prec() count = rg._df_count # how many times is each precursor regions found in the different training sets print(count) df_prec_regions = find_precursors.labels_to_df( rg.list_for_MI[0].prec_labels) df_prec_regions # center lat,lon coordinates and size (in number of gridcells) return rg
text = f'{int(RB[q]*n_spl)}/{count}' temp.append([ lon + 10, lat + 5, text, { 'fontsize': 15, 'bbox': dict(facecolor='white', alpha=0.8) } ]) textinmap.append([(i, 0), temp]) mask = (np.isnan(CDlabels)).astype(bool) if ip == 0: kwrgs_plot = kwrgs_plotcorr_sst.copy() elif ip == 1: kwrgs_plot = kwrgs_plotcorr_SM.copy() # labels plot plot_maps.plot_labels(CDlabels.mean(dim='split'), kwrgs_plot=kwrgs_plot) if save: if method == 'pcmci': dirpath = rg.path_outsub2 else: dirpath = rg.path_outsub1 plt.savefig(os.path.join( dirpath, f'{precur.name}_eps{precur.distance_eps}' f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_labels' + rg.figext), bbox_inches='tight') # MCI values plot kwrgs_plot.update({ 'clevels': np.arange(-0.8, 0.9, .2), 'textinmap': textinmap
# Horizontal plot subtitles = np.array([monthkeys]) title = 'Clusters of correlating regions [from $corr(SST_{t-1},$'+f'$\ T_t^{target[0].capitalize()})$]' kwrgs_plot = {'aspect':2, 'hspace':.35, 'wspace':-.32, 'size':2, 'cbar_vert':-0.1, 'units':'', 'map_proj':ccrs.PlateCarree(central_longitude=220), 'y_ticks':False, 'x_ticks':False, 'subtitles':subtitles, 'title':title, 'title_fontdict':{'y':1.2, 'fontsize':20}, 'col_dim':'months', 'row_dim':labels.dims[1]} fig = plot_maps.plot_labels(labels, kwrgs_plot=kwrgs_plot) fig_path = os.path.join(rg.path_outsub1, f_name+'hor')+rg.figext plt.savefig(fig_path, bbox_inches='tight') # %% # vertical plot title = 'Clusters of \ncorrelating regions' f_name = 'labels_{}_a{}'.format(precur.name, precur.alpha) + '_' + \ f'{experiment}_lag{corlags}_' + \ f'tf{precur_aggr}_{method}' subtitles = np.array([monthkeys]).reshape(-1,1) kwrgs_plot = {'aspect':2, 'hspace':.3, 'wspace':-.4, 'size':2, 'cbar_vert':0.06, 'units':'Corr. Coeff. [-]',
def plot_regions(rg, save, plot_parcorr=False): # Get ConDepKeys df_pvals = rg.df_pvals.copy() df_corr = rg.df_corr.copy() periodnames = list(rg.list_for_MI[0].corr_xr.lag.values) CondDepKeys = {} for i, mon in enumerate(periodnames): list_mon = [] _keys = [k for k in df_pvals.index if mon in k] # month df_sig = df_pvals[df_pvals.loc[_keys] <= alpha_CI].dropna( axis=0, how='all') # significant for k in df_sig.index: corr_val = df_corr.loc[k].mean() RB = (df_pvals.loc[k] < alpha_CI).sum() list_mon.append((k, corr_val, RB)) CondDepKeys[mon] = list_mon for ip, precur in enumerate(rg.list_for_MI): # ip=0; precur = rg.list_for_MI[ip] CDlabels = precur.prec_labels.copy() if precur.group_lag: CDlabels = xr.concat([CDlabels] * len(periodnames), dim='lag') CDlabels['lag'] = ('lag', periodnames) CDcorr = precur.corr_xr_.copy() else: CDcorr = precur.corr_xr.copy() textinmap = [] MCIstr = CDlabels.copy() for i, month in enumerate(CondDepKeys): CDkeys = [ k[0] for k in CondDepKeys[month] if precur.name in k[0].split('..')[-1] ] MCIv = [ k[1] for k in CondDepKeys[month] if precur.name in k[0].split('..')[-1] ] RB = [ k[2] for k in CondDepKeys[month] if precur.name in k[0].split('..')[-1] ] region_labels = [ int(l.split('..')[1]) for l in CDkeys if precur.name in l.split('..')[-1] ] f = find_precursors.view_or_replace_labels if len(CDkeys) != 0: if region_labels[0] == 0: # pattern cov region_labels = np.unique( CDlabels[:, i].values[~np.isnan(CDlabels[:, i]).values]) region_labels = np.array(region_labels, dtype=int) MCIv = np.repeat(MCIv, len(region_labels)) CDkeys = [ CDkeys[0].replace('..0..', f'..{r}..') for r in region_labels ] CDlabels[:, i] = f(CDlabels[:, i].copy(), region_labels) if plot_parcorr: MCIstr[:, i] = f(CDlabels[:, i].copy(), region_labels, replacement_labels=MCIv) else: MCIstr[:, i] = CDcorr[:, i].copy() # get text on robustness: if len(CDkeys) != 0: temp = [] df_labelloc = find_precursors.labels_to_df(CDlabels[:, i]) for q, k in enumerate(CDkeys): l = int(k.split('..')[1]) if l == 0: # pattern cov lat, lon = df_labelloc.mean(0)[:2] else: lat, lon = df_labelloc.loc[l].iloc[:2].values.round(1) if lon > 180: lon - 360 if precur.calc_ts != 'pattern cov': count = rg._df_count[k] text = f'{int(RB[q])}/{count}' temp.append([ lon + 10, lat + 5, text, { 'fontsize': 15, 'bbox': dict(facecolor='white', alpha=0.8) } ]) elif precur.calc_ts == 'pattern cov' and q == 0: count = rg._df_count[f'{month}..0..{precur.name}_sp'] text = f'{int(RB[0])}/{count}' lon = float(CDlabels[:, i].longitude.mean()) lat = float(CDlabels[:, i].latitude.mean()) temp.append([ lon, lat, text, { 'fontsize': 15, 'bbox': dict(facecolor='white', alpha=0.8) } ]) textinmap.append([(i, 0), temp]) if ip == 0: kwrgs_plot = kwrgs_plotcorr_sst.copy() elif ip == 1: kwrgs_plot = kwrgs_plotcorr_SM.copy() # labels plot plot_maps.plot_labels(CDlabels.mean(dim='split'), kwrgs_plot=kwrgs_plot) if save: if method == 'pcmci': dirpath = rg.path_outsub2 else: dirpath = rg.path_outsub1 plt.savefig(os.path.join( dirpath, f'{precur.name}_eps{precur.distance_eps}' f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_labels_' f'{periodnames[-1]}' + rg.figext), bbox_inches='tight') # MCI values plot mask_xr = np.isnan(CDlabels).mean(dim='split') < 1. kwrgs_plot.update({ 'clevels': np.arange(-0.8, 0.9, .1), 'textinmap': textinmap }) fig = plot_maps.plot_corr_maps(MCIstr.where(mask_xr).mean(dim='split'), mask_xr=mask_xr, **kwrgs_plot) if save: fig.savefig(os.path.join( dirpath, f'{precur.name}_eps{precur.distance_eps}' f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_MCI_' f'{periodnames[-1]}' + rg.figext), bbox_inches='tight')
# r = np.meshgrid(xrclusteredall.n_clusters.astype(str).values) r = xrclusteredall.n_clusters.astype(str).values # subtitles = [f'n-clusters={r.flatten()[i]}, ' + f'random state={c.flatten()[i]}' for i in range(c.size)] subtitles = [ f'n-clusters={r.flatten()[i]}, linkage=ward, metric=euclidian' for i in range(r.size) ] fig = plot_maps.plot_labels(xrclusteredall, kwrgs_plot={ 'wspace': .05, 'hspace': .17, 'cbar_vert': .045, 'row_dim': 'n_clusters', 'col_dim': 'linkage', 'zoomregion': selbox, 'cmap': cmp, 'x_ticks': np.array([260, 270, 280]), 'title': title, 'title_fontdict': { 'y': .93, 'fontsize': 18, 'fontweight': 'bold' } }) for i, ax in enumerate(fig.axes[:-1]): np.isnan(xr_States).plot.contour(ax=ax, transform=plot_maps.ccrs.PlateCarree(), linestyles=['solid'], colors=['black'], linewidths=2, levels=[0, 1],