def plot_eofs(self, mean=True, kwrgs: dict=None): kwrgs_plot = {'col_dim':'eof'} if mean: eof_patterns = self.eofs.mean(dim='split') if kwrgs is None: kwrgs_plot.update({'aspect':3}) else: eof_patterns = self.eofs if kwrgs is not None: kwrgs_plot.update(kwrgs) plot_maps.plot_corr_maps(eof_patterns, **kwrgs_plot)
def regrid_array(xr_or_filestr, to_grid, periodic=False): import functions_pp if type(xr_or_filestr) == str: xarray = core_pp.import_ds_lazy(xr_or_filestr) plot_maps.plot_corr_maps(xarray[0]) xr_regrid = functions_pp.regrid_xarray(xarray, to_grid, periodic=periodic) plot_maps.plot_corr_maps(xr_regrid[0]) else: plot_maps.plot_labels(xr_or_filestr) xr_regrid = functions_pp.regrid_xarray(xr_or_filestr, to_grid, periodic=periodic) plot_maps.plot_labels(xr_regrid) plot_maps.plot_labels(xr_regrid.where(xr_regrid.values == 3)) return xr_regrid
def quick_view_labels(self, map_proj=None): for name, actor in self.outdic_precur.items(): prec_labels = actor.prec_labels.copy() # colors of cmap are dived over min to max in n_steps. # We need to make sure that the maximum value in all dimensions will be # used for each plot (otherwise it assign inconsistent colors) max_N_regs = min(20, int(prec_labels.max() + 0.5)) label_weak = np.nan_to_num(prec_labels.values) >= max_N_regs contour_mask = None prec_labels.values[label_weak] = max_N_regs steps = max_N_regs + 1 cmap = plt.cm.tab20 prec_labels.values = prec_labels.values - 0.5 clevels = np.linspace(0, max_N_regs, steps) if prec_labels.split.size == 1: cbar_vert = -0.1 else: cbar_vert = -0.025 kwrgs_corr = { 'row_dim': 'split', 'col_dim': 'lag', 'hspace': -0.35, 'size': 3, 'cbar_vert': cbar_vert, 'clevels': clevels, 'subtitles': None, 'lat_labels': True, 'cticks_center': True, 'cmap': cmap } plot_maps.plot_corr_maps(prec_labels, contour_mask, map_proj, **kwrgs_corr)
file_path = os.path.join(ex['path_data_out'], fname) functions_pp.store_hdf_df(dict_of_dfs, file_path) #actor.ts_corr[ex['RV_name']] = pd.Series(RV.RVfullts.values, index=actor.ts_corr[0].index) central_lon_plots = 200 map_proj = ccrs.LambertCylindrical(central_longitude=central_lon_plots) kwrgs_corr = {'clim': (-0.5, 0.5), 'hspace': -0.6} pdfs_folder = os.path.join(ex['path_fig'], 'pdfs') if os.path.isdir(pdfs_folder) != True: os.makedirs(pdfs_folder) f_format = '.png' #lags_plot = [0, 20, 50] lags_to_plot = lags_i contour_mask = (CPPA_prec['prec_labels'] > 0).sel( lag=lags_to_plot).astype(bool) plot_maps.plot_corr_maps(CPPA_prec.sel(lag=lags_to_plot), contour_mask, map_proj, **kwrgs_corr) lags_str = str(lags_to_plot).replace(' ', '').replace('[', '').replace( ']', '').replace(',', '_') fig_filename = 'CPPA_{}_vs_{}_{}'.format(ex['RV_name'], 'sst', lags_str) + f_format plt.savefig(os.path.join(ex['path_fig'], fig_filename), bbox_inches='tight') kwrgs_corr['clim'] = (.8, 1.0) kwrgs_corr['clevels'] = np.arange(.8, 1 + 1E-9, .025) kwrgs_corr['cmap'] = plt.cm.Reds plot_maps.plot_corr_maps( CPPA_prec.sel(lag=lags_to_plot)['weights'], contour_mask, map_proj, **kwrgs_corr) #%%
def plot_ss2(agg_level, skillscores, col_wrap, metric=None): #%% import find_precursors cluster_nc_path = get_list_of_name_path(agg_level, 1)[0][1] ds = core_pp.import_ds_lazy(cluster_nc_path, format_lon='west_east') cluster_labels_org = ds.coords['cluster'] ds = ds['xrclustered'] #create list of skill score names skillscores_multi_idx = skillscores.index.levels ss_list = [] for i in skillscores_multi_idx[1:][0]: for j in skillscores_multi_idx[1:][1]: ss_name = '{}_{}'.format(i, j) ss_list.append(ss_name) if metric is not None: #only apply single metric ss_list = [metric] #add dimensions and coordinates xr_score = ds.copy() xr_score.attrs = {} list_xr = [xr_score.copy().expand_dims('metric', axis=0) for m in ss_list] xr_score = xr.concat(list_xr, dim='metric') xr_score['metric'] = ('metric', ss_list) list_xr = [ xr_score.copy().expand_dims('target_month', axis=0) for m in skillscores.columns ] xr_score = xr.concat(list_xr, dim='target_month') xr_score['target_month'] = ('target_month', skillscores.columns) #replace labels with skillscores for metric_nr, metric in enumerate(xr_score.metric.values): test_or_train = metric[:metric.find("_")] ss = metric[metric.find("_") + 1:] for month_nr, month in enumerate(xr_score.target_month.values): #slice over metric, month in skill score df metric_cluster_dict = skillscores[month].xs( (test_or_train, ss), level=(1, 2)).to_dict() #replace cluster_labels with their skill score cluster_labels_new = [ metric_cluster_dict.get(x, x) for x in cluster_labels_org.values ] #set all non replaced values of cluster labels to np.nan cluster_labels_new = [ np.nan if isinstance(x, np.int32) else x for x in cluster_labels_new ] #replace values xarr_labels_to_replace = ds xr_score[month_nr, metric_nr] = find_precursors.view_or_replace_labels( xarr_labels_to_replace, regions=list(cluster_labels_org.values), replacement_labels=cluster_labels_new) #set col wrap and subtitles col_wrap = col_wrap #int determines nr of cols import math subtitles = [[] for i in range( int(math.ceil(xr_score.target_month.values.size / col_wrap)))] total_nr_fields = col_wrap * len(subtitles) j = -1 for i, month in enumerate(xr_score.target_month.values): if i % col_wrap == 0: j += 1 subtitles[j].append('{}, {}'.format(month, metric)) if i == max( list(enumerate(xr_score.target_month.values)) )[0] and total_nr_fields > xr_score.target_month.values.size: for k in range(total_nr_fields - xr_score.target_month.values.size): subtitles[j].append('0') #plot fig = plot_maps.plot_corr_maps(xr_score, col_dim='target_month', row_dim='metric', size=4, clevels=np.arange(-.5, 0.51, .1), cbar_vert=-0.1, hspace=-0.2, subtitles=subtitles, col_wrap=col_wrap) #%% return fig
'subtitles': subtitles, 'title_fontdict': { 'y': 1.0, 'fontsize': 18 } } save = True # rg.plot_maps_corr(var='z500', save=save, # min_detect_gc=min_detect_gc, # kwrgs_plot=kwrgs_plot, # append_str=''.join(map(str, z500_green_bb))+TV+str(cluster_label)) z500 = rg.list_for_MI[0] xrvals, xrmask = RGCPD._get_sign_splits_masked(z500.corr_xr, min_detect_gc, z500.corr_xr['mask']) fig = plot_maps.plot_corr_maps(xrvals, xrmask, **kwrgs_plot) rg.get_clust() xrclustered = find_precursors.view_or_replace_labels(rg.ds['xrclustered'], cluster_label) fig.axes[0].contour(xrclustered.longitude, xrclustered.latitude, np.isnan(xrclustered), transform=ccrs.PlateCarree(), levels=[0, 2], linewidths=1, linestyles=['solid'], colors=['white']) filename = os.path.join(rg.path_outsub1, 'z500vsmx2t_' + rg.hash + '_' + str(cluster_label))
def plot_regions(rg, save, plot_parcorr=False): # Get ConDepKeys df_pvals = rg.df_pvals.copy() df_corr = rg.df_corr.copy() periodnames = list(rg.list_for_MI[0].corr_xr.lag.values) CondDepKeys = {} for i, mon in enumerate(periodnames): list_mon = [] _keys = [k for k in df_pvals.index if mon in k] # month df_sig = df_pvals[df_pvals.loc[_keys] <= alpha_CI].dropna( axis=0, how='all') # significant for k in df_sig.index: corr_val = df_corr.loc[k].mean() RB = (df_pvals.loc[k] < alpha_CI).sum() list_mon.append((k, corr_val, RB)) CondDepKeys[mon] = list_mon for ip, precur in enumerate(rg.list_for_MI): # ip=0; precur = rg.list_for_MI[ip] CDlabels = precur.prec_labels.copy() if precur.group_lag: CDlabels = xr.concat([CDlabels] * len(periodnames), dim='lag') CDlabels['lag'] = ('lag', periodnames) CDcorr = precur.corr_xr_.copy() else: CDcorr = precur.corr_xr.copy() textinmap = [] MCIstr = CDlabels.copy() for i, month in enumerate(CondDepKeys): CDkeys = [ k[0] for k in CondDepKeys[month] if precur.name in k[0].split('..')[-1] ] MCIv = [ k[1] for k in CondDepKeys[month] if precur.name in k[0].split('..')[-1] ] RB = [ k[2] for k in CondDepKeys[month] if precur.name in k[0].split('..')[-1] ] region_labels = [ int(l.split('..')[1]) for l in CDkeys if precur.name in l.split('..')[-1] ] f = find_precursors.view_or_replace_labels if len(CDkeys) != 0: if region_labels[0] == 0: # pattern cov region_labels = np.unique( CDlabels[:, i].values[~np.isnan(CDlabels[:, i]).values]) region_labels = np.array(region_labels, dtype=int) MCIv = np.repeat(MCIv, len(region_labels)) CDkeys = [ CDkeys[0].replace('..0..', f'..{r}..') for r in region_labels ] CDlabels[:, i] = f(CDlabels[:, i].copy(), region_labels) if plot_parcorr: MCIstr[:, i] = f(CDlabels[:, i].copy(), region_labels, replacement_labels=MCIv) else: MCIstr[:, i] = CDcorr[:, i].copy() # get text on robustness: if len(CDkeys) != 0: temp = [] df_labelloc = find_precursors.labels_to_df(CDlabels[:, i]) for q, k in enumerate(CDkeys): l = int(k.split('..')[1]) if l == 0: # pattern cov lat, lon = df_labelloc.mean(0)[:2] else: lat, lon = df_labelloc.loc[l].iloc[:2].values.round(1) if lon > 180: lon - 360 if precur.calc_ts != 'pattern cov': count = rg._df_count[k] text = f'{int(RB[q])}/{count}' temp.append([ lon + 10, lat + 5, text, { 'fontsize': 15, 'bbox': dict(facecolor='white', alpha=0.8) } ]) elif precur.calc_ts == 'pattern cov' and q == 0: count = rg._df_count[f'{month}..0..{precur.name}_sp'] text = f'{int(RB[0])}/{count}' lon = float(CDlabels[:, i].longitude.mean()) lat = float(CDlabels[:, i].latitude.mean()) temp.append([ lon, lat, text, { 'fontsize': 15, 'bbox': dict(facecolor='white', alpha=0.8) } ]) textinmap.append([(i, 0), temp]) if ip == 0: kwrgs_plot = kwrgs_plotcorr_sst.copy() elif ip == 1: kwrgs_plot = kwrgs_plotcorr_SM.copy() # labels plot plot_maps.plot_labels(CDlabels.mean(dim='split'), kwrgs_plot=kwrgs_plot) if save: if method == 'pcmci': dirpath = rg.path_outsub2 else: dirpath = rg.path_outsub1 plt.savefig(os.path.join( dirpath, f'{precur.name}_eps{precur.distance_eps}' f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_labels_' f'{periodnames[-1]}' + rg.figext), bbox_inches='tight') # MCI values plot mask_xr = np.isnan(CDlabels).mean(dim='split') < 1. kwrgs_plot.update({ 'clevels': np.arange(-0.8, 0.9, .1), 'textinmap': textinmap }) fig = plot_maps.plot_corr_maps(MCIstr.where(mask_xr).mean(dim='split'), mask_xr=mask_xr, **kwrgs_plot) if save: fig.savefig(os.path.join( dirpath, f'{precur.name}_eps{precur.distance_eps}' f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_MCI_' f'{periodnames[-1]}' + rg.figext), bbox_inches='tight')
import plot_maps import cartopy.crs as ccrs ; import matplotlib.pyplot as plt rm = 5 ds_sm = self.ds_seldates.rolling(time=rm).mean() ds_raw_e = ds_sm.sel(time=np.concatenate(self.event_lagged)) xarray = ds_raw_e.copy().rename({'time':'lag'}) xarray = xarray.assign_coords(lag=np.concatenate(self.lag_axes)) xarray = xarray / self.ds_seldates.std(dim='time') xr_snap = xarray.groupby('lag').mean().sel(lag=np.arange(-20,21,5)) kwrgs_plot = {'y_ticks':np.arange(0,61, 20), 'map_proj':ccrs.PlateCarree(central_longitude=180), 'hspace':.2, 'cbar_vert':.05, 'clevels':np.arange(-.5, .51, .1)} plot_maps.plot_corr_maps(xr_snap, row_dim='lag', col_dim='split', **kwrgs_plot) plt.savefig(os.path.join(rg.path_outsub1, f'snapshots_{var}_rm{rm}.pdf')) #%% Correlation PNA-like RW with Wavenumber 6 phase 2 # only for eastern import core_pp, find_precursors values = [] if west_or_east == 'eastern': lags_list = range(-10,10) for lag in lags_list: selbox = (0,360,25,60) # selbox = (140,300,20,73) tfreq = 1 # lag = 0 dates_RV = core_pp.get_subdates(pd.to_datetime(rg.fulltso.time.values), start_end_date=rg.start_end_TVdate) RV_ts = rg.fulltso.sel(time=dates_RV) ds_v300 = core_pp.import_ds_lazy(rg.list_precur_pp[1][1])
except: SST_pp_filepath = user_dir + '/surfdrive/ERA5/input_raw/preprocessed/sst_1979-2020_jan_dec_monthly_1.0deg.nc' if 'df_ENSO' not in globals(): df_PDO, PDO_patterns = climate_indices.PDO(SST_pp_filepath, None) PDO_plot_kwrgs = {'units':'[-]', 'cbar_vert':-.1, # 'zoomregion':(130,260,20,60), 'map_proj':ccrs.PlateCarree(central_longitude=220), 'y_ticks':np.array([25,40,50,60]), 'x_ticks':np.arange(130, 280, 25), 'clevels':np.arange(-.6,.61,.075), 'clabels':np.arange(-.6,.61,.3), 'subtitles':np.array([['PDO loading pattern']])} fig = plot_maps.plot_corr_maps(PDO_patterns[0], **PDO_plot_kwrgs) filepath = os.path.join(path_out_main, 'PDO_pattern') fig.savefig(filepath + '.pdf', bbox_inches='tight') fig.savefig(filepath + '.png', bbox_inches='tight') # summerdates = core_pp.get_subdates(dates, start_end_TVdate) df_PDOsplit = df_PDO.loc[0]#.loc[summerdates] # standardize = preprocessing.StandardScaler() # standardize.fit(df_PDOsplit[df_PDOsplit['TrainIsTrue'].values].values.reshape(-1,1)) # df_PDOsplit = pd.DataFrame(standardize.transform(df_PDOsplit['PDO'].values.reshape(-1,1)), # index=df_PDOsplit.index, columns=['PDO']) df_PDOsplit = df_PDOsplit[['PDO']].apply(standardize_on_train, args=[df_PDO.loc[0]['TrainIsTrue']], result_type='broadcast') # Butter Lowpass
# Adjusted box for Reviewer 1, z500_green_bb = (155,255,20,73) #: RW box subtitles = [['winter (DJF)'], ['spring (MAM)'], ['summer (JJA)']] drawbox = ['all', z500_green_bb] # drawbox = [[(0,i), z500_green_bb] for i in range(len(subtitles))] title = f'$corr(z500_t, T^{west_east.capitalize()[0]}_t)$' kwrgs_plot = {'row_dim':'lag', 'col_dim':'split', 'aspect':3.8, 'size':2.5, 'hspace':0.2, 'cbar_vert':.01, 'units':'Corr. Coeff. [-]', 'zoomregion':(-180,360,0,80), 'drawbox':drawbox, 'map_proj':ccrs.PlateCarree(central_longitude=220), 'y_ticks':np.array([10,30,50,70,90]), 'clim':(-.6,.6), 'title':title, 'subtitles':subtitles, 'title_fontdict':{'y':0.96, 'fontsize':18}} g = plot_maps.plot_corr_maps(corr, mask, **kwrgs_plot) g.fig.savefig(os.path.join(path_output, 'z500_vs_T_seasonal_dependence.jpg'), dpi=300, bbox_inches='tight') g.fig.savefig(os.path.join(path_output, 'z500_vs_T_seasonal_dependence.pdf'), bbox_inches='tight') #%% import matplotlib as mpl mpl.rcParams.update(mpl.rcParamsDefault) for rg in rg_list: rg.get_EOFs() rg.plot_EOFs() #%% import matplotlib as mpl mpl.rcParams.update(mpl.rcParamsDefault)
dirpath = rg.path_outsub2 else: dirpath = rg.path_outsub1 plt.savefig(os.path.join( dirpath, f'{precur.name}_eps{precur.distance_eps}' f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_labels' + rg.figext), bbox_inches='tight') # MCI values plot kwrgs_plot.update({ 'clevels': np.arange(-0.8, 0.9, .2), 'textinmap': textinmap }) fig = plot_maps.plot_corr_maps( MCIstr.mean(dim='split'), mask_xr=np.isnan(MCIstr.mean(dim='split')).astype(bool), **kwrgs_plot) if save: fig.savefig(os.path.join( dirpath, f'{precur.name}_eps{precur.distance_eps}' f'minarea{precur.min_area_in_degrees2}_aCI{alpha_CI}_MCI' + rg.figext), bbox_inches='tight') #%% from sklearn.linear_model import RidgeCV from sklearn.ensemble import RandomForestClassifier from sklearn.linear_model import LogisticRegressionCV from stat_models_cont import ScikitModel # fcmodel = ScikitModel(RandomForestRegressor, verbosity=0)
def pipeline(lags, periodnames, load=False): #%% method = False SM_lags = lags.copy() for i, l in enumerate(SM_lags): orig = '-'.join(l[0].split('-')[:-1]) repl = '-'.join(l[1].split('-')[:-1]) SM_lags[i] = [l[0].replace(orig, repl), l[1]] SM = BivariateMI(name='smi', filepath=filename_smi_pp, func=class_BivariateMI.corr_map, alpha=alpha_corr, FDR_control=True, kwrgs_func={}, distance_eps=250, min_area_in_degrees2=3, calc_ts='pattern cov', selbox=USBox, lags=SM_lags, use_coef_wghts=True) load_SM = '{}_a{}_{}_{}_{}'.format(SM._name, SM.alpha, SM.distance_eps, SM.min_area_in_degrees2, periodnames[-1]) loaded = SM.load_files(pathoutfull, load_SM) SM.prec_labels['lag'] = ('lag', periodnames) SM.corr_xr['lag'] = ('lag', periodnames) # SM.get_prec_ts(kwrgs_load={}) # df_SM = pd.concat(SM.ts_corr, keys=range(len(SM.ts_corr))) TVpath = os.path.join(pathoutfull, f'df_output_{periodnames[-1]}.h5') z500_maps = [] for i, periodname in enumerate(periodnames): lag = np.array(lags[i]) _yrs = [int(l.split('-')[0]) for l in lag] if np.unique(_yrs).size > 1: # crossing year crossyr = True else: crossyr = False start_end_TVdate = ('-'.join(lag[0].split('-')[1:]), '-'.join(lag[1].split('-')[1:])) lag = np.array([start_end_TVdate]) list_for_MI = [ BivariateMI(name='z500', func=class_BivariateMI.corr_map, alpha=alpha_corr, FDR_control=True, kwrgs_func={}, distance_eps=250, min_area_in_degrees2=3, calc_ts='pattern cov', selbox=(155, 355, 10, 80), lags=lag, group_split=True, use_coef_wghts=True) ] name_ds = f'{periodname}..0..{target_dataset}_sp' list_of_name_path = [ ('', TVpath), ('z500', os.path.join(path_raw, 'z500_1950-2019_1_12_monthly_1.0deg.nc')) ] start_end_year = (1951, 2019) if crossyr: TV_start_end_year = (start_end_year[0] + 1, 2019) else: TV_start_end_year = (start_end_year[0], 2019) kwrgs_core_pp_time = {'start_end_year': TV_start_end_year} rg = RGCPD(list_of_name_path=list_of_name_path, list_for_MI=list_for_MI, list_import_ts=None, start_end_TVdate=start_end_TVdate, start_end_date=None, start_end_year=start_end_year, tfreq=None, path_outmain=path_out_main) rg.figext = '.png' rg.pp_precursors(detrend=[True, { 'tp': False, 'smi': False }], anomaly=[True, { 'tp': False, 'smi': False }]) # detrending done prior in clustering_soybean rg.pp_TV(name_ds=name_ds, detrend=False, ext_annual_to_mon=False, kwrgs_core_pp_time=kwrgs_core_pp_time) # if method.split('_')[0]=='leave': # rg.traintest(method, gap_prior=1, gap_after=1, seed=seed, # subfoldername=subfoldername) # else: rg.traintest(method, seed=seed, subfoldername=subfoldername) z500 = rg.list_for_MI[0] path_circ = os.path.join(rg.path_outsub1, 'circulation') os.makedirs(path_circ, exist_ok=True) load_z500 = '{}_a{}_{}_{}_{}'.format(z500._name, z500.alpha, z500.distance_eps, z500.min_area_in_degrees2, periodnames[-1]) if load == 'maps' or load == 'all': loaded = z500.load_files(path_circ, load_z500) else: loaded = False if hasattr(z500, 'corr_xr') == False: rg.calc_corr_maps('z500') # store forecast month months = { 'JJ': 'August', 'MJ': 'July', 'AM': 'June', 'MA': 'May', 'FM': 'April', 'JF': 'March', 'SO': 'December', 'DJ': 'February' } rg.fc_month = months[periodnames[-1]] z500_maps.append(z500.corr_xr) if loaded == False: z500.store_netcdf(path_circ, load_z500, add_hash=False) z500_maps = xr.concat(z500_maps, dim='lag') z500_maps['lag'] = ('lag', periodnames) #%% # merge maps xr_merge = xr.concat( [SM.corr_xr.mean('split'), z500_maps.drop_vars('split').squeeze()], dim='var') xr_merge['var'] = ('var', ['SM', 'z500']) xr_merge = xr_merge.sel(lag=periodnames[::-1]) # get mask maskSM = RGCPD._get_sign_splits_masked(SM.corr_xr, min_detect=.1, mask=SM.corr_xr['mask'])[1] xr_mask = xr.concat( [maskSM, z500_maps['mask'].drop_vars('split').squeeze()], dim='var') xr_mask['var'] = ('var', ['SM', 'z500']) xr_mask = xr_mask.sel(lag=periodnames[::-1]) month_d = { 'AS': 'Aug-Sep mean', 'JJ': 'July-June mean', 'JA': 'July-June mean', 'MJ': 'May-June mean', 'AM': 'Apr-May mean', 'MA': 'Mar-Apr mean', 'FM': 'Feb-Mar mean', 'JF': 'Jan-Feb mean', 'DJ': 'Dec-Jan mean', 'ND': 'Nov-Dec mean', 'ON': 'Oct-Nov mean', 'SO': 'Sep-Oct mean' } subtitles = np.array([month_d[l] for l in xr_merge.lag.values], dtype='object')[::-1] subtitles = np.array([[s + ' SM vs yield' for s in subtitles[::-1]], [s + ' z500 vs SM' for s in subtitles[::-1]]]) # leadtime = intmon_d[rg.fc_month] # subtitles = [subtitles[i-1]+f' ({leadtime+i*2-1}-month lag)' for i in range(1,5)] kwrgs_plot = { 'zoomregion': (170, 355, 15, 80), 'hspace': -.1, 'cbar_vert': .05, 'subtitles': subtitles, 'clevels': np.arange(-0.8, 0.9, .1), 'clabels': np.arange(-.8, .9, .2), 'units': 'Correlation', 'y_ticks': np.arange(15, 75, 15), 'x_ticks': np.arange(150, 310, 30) } fg = plot_maps.plot_corr_maps(xr_merge, xr_mask, col_dim='lag', row_dim='var', **kwrgs_plot) facecolorocean = '#caf0f8' facecolorland = 'white' for ax in fg.fig.axes[:-1]: ax.add_feature(plot_maps.cfeature.__dict__['LAND'], facecolor=facecolorland, zorder=0) ax.add_feature(plot_maps.cfeature.__dict__['OCEAN'], facecolor=facecolorocean, zorder=0) fg.fig.savefig(os.path.join(path_circ, f'SM_vs_circ_{rg.fc_month}' + rg.figext), bbox_inches='tight') # #%% # if hasattr(sst, 'prec_labels')==False and 'sst' in use_vars: # rg.cluster_list_MI('sst') # sst.group_small_cluster(distance_eps_sc=2000, eps_corr=0.4) # sst.prec_labels['lag'] = ('lag', periodnames) # sst.corr_xr['lag'] = ('lag', periodnames) # rg.quick_view_labels('sst', min_detect_gc=.5, save=save, # append_str=periodnames[-1]) # plt.close() #%% return rg
def calculate_corr_maps(ex, map_proj): #%% # ============================================================================= # Load 'exp' dictionairy with information of pre-processed data (variables, paths, filenames, etcetera..) # and add RGCPD/Tigrimate experiment settings # ============================================================================= # Response Variable is what we want to predict RV = ex[ex['RV_name']] ex['time_cycle'] = RV.dates[ RV.dates.year == RV.startyear].size # time-cycle of data. total timesteps in one year ex['time_range_all'] = [0, RV.dates.size] #================================================================================== # Start of experiment #================================================================================== # Define traintest: df_RVfullts = pd.DataFrame(RV.RVfullts.values, index=pd.to_datetime(RV.RVfullts.time.values)) df_RV_ts = pd.DataFrame(RV.RV_ts.values, index=pd.to_datetime(RV.RV_ts.time.values)) if ex['method'][:9] == 'ran_strat': kwrgs_events = ex['kwrgs_events'] RV = func_fc.RV_class(df_RVfullts, df_RV_ts, kwrgs_events) else: RV = func_fc.RV_class(df_RVfullts, df_RV_ts) if ex['import_prec_ts']: # Retrieve same train test split as imported ts path_data = ''.join(ex['precursor_ts'][0][1]) df_splits = func_fc.load_hdf5( path_data)['df_data'].loc[:, ['TrainIsTrue', 'RV_mask']] test_yrs = functions_pp.get_testyrs(df_splits) df_splits, ex = functions_pp.rand_traintest_years(RV, ex, test_yrs) assert (np.equal(test_yrs, ex['tested_yrs'])).all(), "Train test split not equal" else: df_splits, ex = functions_pp.rand_traintest_years(RV, ex) # ============================================================================= # 2) DEFINE PRECURSOS COMMUNITIES: # ============================================================================= # - calculate and plot pattern correltion for differnt fields # - create time-series over these regions #===================================================================================== outdic_actors = dict() class act: def __init__(self, name, corr_xr, precur_arr): self.name = var self.corr_xr = corr_xr self.precur_arr = precur_arr self.lat_grid = precur_arr.latitude.values self.lon_grid = precur_arr.longitude.values self.area_grid = rgcpd.get_area(precur_arr) self.grid_res = abs(self.lon_grid[1] - self.lon_grid[0]) allvar = ex['vars'][0] # list of all variable names for var in allvar[ex['excludeRV']:]: # loop over all variables actor = ex[var] #=========================================== # 3c) Precursor field #=========================================== file_path = os.path.join(actor.path_pp, actor.filename_pp) precur_arr = functions_pp.import_ds_timemeanbins(file_path, ex) # precur_arr = rgcpd.convert_longitude(precur_arr, 'only_east') # ============================================================================= # Calculate correlation # ============================================================================= corr_xr = rgcpd.calc_corr_coeffs_new(precur_arr, RV, ex) # ============================================================================= # Cluster into precursor regions # ============================================================================= actor = act(var, corr_xr, precur_arr) actor, ex = rgcpd.cluster_DBSCAN_regions(actor, ex) if np.isnan(actor.prec_labels.values).all() == False: rgcpd.plot_regs_xarray(actor.prec_labels.copy(), ex) outdic_actors[var] = actor # ============================================================================= # Plot # ============================================================================= if ex['plotin1fig'] == False: plot_maps.plot_corr_maps(corr_xr, corr_xr['mask'], map_proj) fig_filename = '{}_corr_{}_vs_{}'.format( ex['params'], ex['RV_name'], var) + ex['file_type2'] plt.savefig(os.path.join(ex['fig_path'], fig_filename), bbox_inches='tight', dpi=200) if ex['showplot'] == False: plt.close() #%% return ex, outdic_actors
title = r'$corr(SST_{}, T^{}_t)$'.format('{'+f't-{corlags[0]}'+'}', target[0].capitalize()) # title = '$corr(SST_{'+f't-{corlags[0]}'+'}, T^{}_t)$'.format(target[0].capitalize()) kwrgs_plot = {'aspect':2, 'hspace':.35, 'wspace':-.32, 'size':2, 'cbar_vert':-0.1, 'units':'Corr. Coeff. [-]', 'map_proj':ccrs.PlateCarree(central_longitude=220), 'clevels':np.arange(-.6,.61,.075), 'clabels':np.arange(-.6,.61,.3), 'y_ticks':False, 'x_ticks':False, 'subtitles':subtitles, 'title':title, 'title_fontdict':{'y':1.2, 'fontsize':20}} fig = plot_maps.plot_corr_maps(corr, mask_xr=corr.mask, col_dim='months', row_dim=corr.dims[1], **kwrgs_plot) fig_path = os.path.join(rg.path_outsub1, f_name+'hor')+rg.figext plt.savefig(fig_path, bbox_inches='tight') # %% # vertical plot f_name = 'corr_{}_a{}'.format(precur.name, precur.alpha) + '_' + \ f'{experiment}_lag{corlags}_' + \ f'tf{precur_aggr}_{method}_gc{min_detect_gc}' subtitles = np.array([['$t=$'+k+' mean' for k in monthkeys]]) subtitles = subtitles.reshape(-1,1) kwrgs_plot = {'aspect':2, 'hspace':.3, 'wspace':-.4, 'size':2, 'cbar_vert':0.06, 'units':'Corr. Coeff. [-]', 'map_proj':ccrs.PlateCarree(central_longitude=220),
'map_proj': ccrs.PlateCarree(central_longitude=220), 'clevels': np.arange(-.6, .61, .075), 'clabels': np.arange(-.6, .61, .3), 'y_ticks': False, 'x_ticks': False, 'subtitles': subtitles, 'title': title, 'title_fontdict': { 'y': 1.2, 'fontsize': 20 } } fig = plot_maps.plot_corr_maps(corr, mask_xr=corr.mask, col_dim='months', row_dim=corr.dims[1], **kwrgs_plot) fig_path = os.path.join(rg.path_outsub1, f_name + 'hor') + rg.figext plt.savefig(fig_path, bbox_inches='tight') # %% # vertical plot f_name = 'corr_{}_a{}'.format(precur.name, precur.alpha) + '_' + \ f'{experiment}_lag{corlags}_' + \ f'tf{precur_aggr}_{method}_gc{min_detect_gc}' subtitles = np.array([['$t=$' + k + ' mean' for k in monthkeys]]) subtitles = subtitles.reshape(-1, 1) kwrgs_plot = { 'aspect': 2, 'hspace': .3,
xarray.values == Country.CA) elif domain == 'US': mask_US_CA = xarray.values == Country.US # xr_mask = xarray.where(mask_US_CA) xr_mask = xarray.where(make_country_mask.binary_erosion(mask_US_CA)) # xr_mask = xarray.where(make_country_mask.binary_erosion(np.nan_to_num(xr_mask))) xr_mask.values[~np.isnan(xr_mask)] = 1 xr_mask = find_precursors.xrmask_by_latlon(xr_mask, upper_right=(270, 63)) # mask small Western US Island xr_mask = find_precursors.xrmask_by_latlon(xr_mask, bottom_left=(228, 58)) # add Rocky mask geo_surf_height = core_pp.import_ds_lazy( orography, var='z_NON_CDM', selbox=selbox) / 9.81 geo_surf_height = geo_surf_height.drop('time').drop('realization') plot_maps.plot_corr_maps(geo_surf_height, cmap=plt.cm.Oranges, clevels=np.arange(0, 2600, 500)) max_height = 1500 mask_Rockies = geo_surf_height < max_height plot_maps.plot_labels(mask_Rockies) xr_mask = xr_mask.where(mask_Rockies) plot_maps.plot_labels(xr_mask) # In[9]: # ============================================================================= # Clustering co-occurence of anomalies different tfreqs # ============================================================================= q = 66 tfreq = [5, 10, 15, 30] n_clusters = [4, 5, 6, 7, 8, 9, 10]
def ENSO_34(filepath, df_splits=None, get_ENSO_states: bool = True): #%% # file_path = '/Users/semvijverberg/surfdrive/Data_era5/input_raw/sst_1979-2018_1_12_daily_2.5deg.nc' ''' See http://www.cgd.ucar.edu/staff/cdeser/docs/deser.sstvariability.annrevmarsci10.pdf selbox has format of (lon_min, lon_max, lat_min, lat_max) ''' # if df_splits is None: # seldates = None # else: # seldates = df_splits.loc[0].index # {'la_min':-5, # select domain in degrees east # 'la_max':5, # 'lo_min':-170, # 'lo_max':-120}, kwrgs_pp = { 'selbox': (190, 240, -5, 5), 'format_lon': 'only_east', 'seldates': None } ds = core_pp.import_ds_lazy(filepath, **kwrgs_pp) dates = pd.to_datetime(ds.time.values) data = functions_pp.area_weighted(ds).mean(dim=('latitude', 'longitude')) df_ENSO = pd.DataFrame(data=data.values, index=dates, columns=['ENSO34']) if df_splits is not None: splits = df_splits.index.levels[0] df_ENSO = pd.concat([df_ENSO] * splits.size, axis=0, keys=splits) if get_ENSO_states: ''' From Anderson 2017 - Life cycles of agriculturally relevant ENSO teleconnections in North and South America. http://doi.wiley.com/10.1002/joc.4916 mean boreal wintertime (October, November, December) SST anomaly amplitude in the Niño 3.4 region exceeded 1 of 2 standard deviation. ''' if hasattr(df_ENSO.index, 'levels'): df_ENSO_s = df_ENSO.loc[0] else: df_ENSO_s = df_ENSO dates = df_ENSO_s.index df_3monthmean = df_ENSO_s.rolling(3, center=True, min_periods=1).mean() std_ENSO = df_3monthmean.std() OND, groups = core_pp.get_subdates(dates, start_end_date=('10-01', '12-31'), returngroups=True) OND_ENSO = df_3monthmean.loc[OND].groupby(groups).mean() nino_yrs = OND_ENSO[OND_ENSO > df_3monthmean.mean() + std_ENSO][:].dropna().index #+ 1 nina_yrs = OND_ENSO[OND_ENSO < df_3monthmean.mean() - std_ENSO][:].dropna().index #+ 1 neutral = [ y for y in OND_ENSO.index if y not in core_pp.flatten([nina_yrs, nino_yrs]) ] states = {} for i, d in enumerate(dates): if d.year in nina_yrs: states[d.year] = -1 if d.year in neutral: states[d.year] = 0 if d.year in nino_yrs: states[d.year] = 1 cycle_list = [] for s, v in [('EN', 1), ('LN', -1)]: ENSO_cycle = {d.year: 0 for d in dates} for i, year in enumerate(np.unique(dates.year)): # d = dates[1] # if states[year] == v: # s = 'EN' # elif states[year] == -1: # s = 'LN' if states[year] == v: ENSO_cycle[year] = f'{s}0' if year - 1 in dates.year and states[year - 1] != v: ENSO_cycle[year - 1] = f'{s}-1' if year + 1 in dates.year and states[year + 1] != v: ENSO_cycle[year + 1] = f'{s}+1' cycle_list.append(ENSO_cycle) time_index = pd.to_datetime([f'{y}-01-01' for y in states.keys()]) df_state = pd.concat([ pd.Series(states), pd.Series(cycle_list[0]), pd.Series(cycle_list[1]) ], axis=1, keys=['state', 'EN_cycle', 'LN_cycle']) df_state.index = time_index if hasattr(df_ENSO.index, 'levels'): # copy to other traintest splits df_state = pd.concat([df_state] * splits.size, keys=splits) composites = np.zeros(3, dtype=object) for i, yrs in enumerate([nina_yrs, neutral, nino_yrs]): composite = [d for d in dates if d.year in yrs] composites[i] = ds.sel(time=composite).mean(dim='time') composites = xr.concat(composites, dim='state') composites['state'] = ['Nina', 'Neutral', 'Nino'] plot_maps.plot_corr_maps(composites, row_dim='state', hspace=0.5) out = df_ENSO, [ np.array(nina_yrs), np.array(neutral), np.array(nino_yrs) ], df_state else: out = df_ENSO #%% return out
'edgecolors': 'black' } ]]] hspace = -.5 cbar_vert = .14 subtitles = np.array([point_corr.points]).reshape(-1, col_wrap) # scatter = None fig = plot_maps.plot_corr_maps(point_corr, mask_xr=point_corr['mask'], col_dim='points', aspect=1.5, hspace=hspace, cbar_vert=cbar_vert, subtitles=subtitles, scatter=scatter, col_wrap=col_wrap, x_ticks=np.arange(235, 310, 15), y_ticks=np.arange(0, 61, 10), wspace=.07, clevels=np.arange(-1, 1.05, .1), cmap=plt.cm.coolwarm, zoomregion=selbox) for ax in fig.axes[:-1]: ax.contour(mask_cl.longitude, mask_cl.latitude, mask_cl, transform=ccrs.PlateCarree(), levels=[0, 2], linewidths=1, linestyles=['solid'],
print('La Nina yrs', list(cycle[cycle=='LN0'].dropna().index.year)) #%% Composites of Anderson 2017 ENSO states for title in ['EN-1', 'EN0', 'EN+1', 'LN-1', 'LN0', 'LN+1']: cycle = df_states[[f'{title[:2]}_cycle']].loc[0] selyrs = cycle[cycle==title].dropna().index.year # print(title, selyrs) kwrgs = {'hspace':0.2, 'aspect':4, 'cbar_vert':0.04, 'clevels':np.arange(-.75, .76, .25), 'title_fontdict':{'y':.95}, 'y_ticks':False, 'x_ticks':False} comp = [d for d in pd.to_datetime(season.time.values) if d.year in selyrs] ds_plot = season.sel(time=pd.to_datetime(comp)).groupby('time.month').mean() ds_plot = ds_plot.rename({'month':'season'}) ds_plot['season'] = ['DJF', 'MAM', 'JJA', 'SON'] plot_maps.plot_corr_maps(ds_plot, row_dim='season', title=title+f' (n={selyrs.size})', **kwrgs) #%% Composites of SST during Low yield lowyield = rg.dates_TV[(rg.TV_ts < rg.TV_ts.quantile(.33)).values].year comp = [d for d in pd.to_datetime(season.time.values) if d.year in lowyield] ds_plot = season.sel(time=pd.to_datetime(comp)).groupby('time.month').mean() ds_plot = ds_plot.rename({'month':'season'}) ds_plot['season'] = ['DJF', 'MAM', 'JJA', 'SON'] plot_maps.plot_corr_maps(ds_plot, row_dim='season', title='low yield', **kwrgs) # low_prior = lowyield - 1 # lwyield = [d for d in pd.to_datetime(season.time.values) if d.year in low_prior] # plot_maps.plot_corr_maps(season.sel(time=pd.to_datetime(lwyield)).groupby('time.month').mean(), # row_dim='month', title='low yield prior', **kwrgs)
def spatial_valid(var_filename, mask, y_pred_all, y_pred_c, lags_i=None, seldates=None, clusters=None, kwrgs_events=None, alpha=0.05, n_boot=0, blocksize=10, threshold_pred='upper_clim'): ''' var_filename must be 3d netcdf file with only one variable mask can be nc file containing only a mask, or a latlon box in format [west_lon, east_lon, south_lat, north_lat] in format in common west-east degrees ''' var_filename = '/Users/semvijverberg/surfdrive/Data_era5/input_raw/preprocessed/t2mmax_US_1979-2018_1jan_31dec_daily_0.25deg.nc' mask = '/Users/semvijverberg/surfdrive/Data_era5/input_raw/preprocessed/cluster_output.nc' if lags_i is None: lags_i = list(y_pred_all.columns) # load in daily xarray and mask xarray = core_pp.import_ds_lazy(var_filename) npmask = cl.get_spatial_ma(var_filename, mask) # process temporal infor freq = (y_pred_c.index[1] - y_pred_c.index[0]).days if seldates is None: seldates = aggr_to_daily_dates(y_pred_c.index) start = f'{seldates[0].month}-{seldates[0].day}' end = f'{seldates[-1].month}-{seldates[-1].day}' start_end_date = (start, end) xarray, dates = functions_pp.time_mean_bins(xarray, to_freq=freq, start_end_date=start_end_date) # if switching to event timeseries: if kwrgs_events is None: kwrgs_events = {'event_percentile': 66} # unpack other optional arguments for defining event timeseries kwrgs = { key: item for key, item in kwrgs_events.items() if key != 'event_percentile' } if clusters is None: clusters = list(np.unique(npmask[~np.isnan(npmask)])) elif type(clusters) is int: clusters = [clusters] elif clusters is not None: clusters = clusters dict_allclus = {} for clus in clusters: latloni = np.where(npmask == clus) latloni = [(latloni[0][i], latloni[1][i]) for i in range(latloni[0].size)] futures = {} with ProcessPoolExecutor(max_workers=max_cpu) as pool: for ll in latloni: latloni = latloni xr_gridcell = xarray.isel(latitude=ll[0]).isel(longitude=ll[1]) threshold = func_fc.Ev_threshold( xr_gridcell, kwrgs_events['event_percentile']) y_i = func_fc.Ev_timeseries(xr_gridcell, threshold, **kwrgs)[0] futures[ll] = pool.submit(valid.get_metrics_sklearn, y_i.values, y_pred_all[lags_i], y_pred_c, alpha=alpha, n_boot=n_boot, blocksize=blocksize, threshold_pred=threshold_pred) results = {key: future.result() for key, future in futures.items()} dict_allclus[clus] = results df_valid = dict_allclus[clus][ll][0] metrics = np.unique(df_valid.index.get_level_values(0)) lags_tf = [l * freq for l in lags_i] if freq != 1: # the last day of the time mean bin is tfreq/2 later then the centerered day lags_tf = [ l_tf - int(freq / 2) if l_tf != 0 else 0 for l_tf in lags_tf ] for clus in clusters: results = dict_allclus[clus] xroutput = xarray.isel(time=lags_i).rename({'time': 'lag'}) xroutput['lag'] = lags_tf xroutput = xroutput.expand_dims({'metric': metrics}, 0) npdata = np.array(np.zeros_like(xroutput), dtype='float32') for ll in latloni: df_valid = dict_allclus[clus][ll][0] for i, met in enumerate(metrics): lat_i = ll[0] lon_i = ll[1] npdata[i, :, lat_i, lon_i] = df_valid.loc[met].loc[met] xroutput.values = npdata plot_maps.plot_corr_maps(xroutput.where(npmask == clus), row_dim='metric', size=4, clevels=np.arange(-1, 1.1, 0.2)) BSS = xroutput.where(npmask == clus).sel(metric='BSS') plot_maps.plot_corr_maps(BSS, row_dim='metric', size=4, clevels=np.arange(-0.25, 0.251, 0.05), cbar_vert=-0.1)
ts_std = (ts - ts.mean()) / ts.std() ts_std.plot() #%% ds_std = (ds - ds.mean(dim='time')) / ds.std(dim='time') ts_ds_std = ds_std.mean(dim=('latitude', 'longitude')) #%% f, ax = plt.subplots() ts_ds_std.plot(ax=ax, c='blue') ts_std.plot(ax=ax) #%% cl.store_netcdf( ts, filepath=os.path.join( '/Users/semvijverberg/surfdrive/VU_Amsterdam/GDHY_MIRCA2000_Soy/USDA/usda_maize_spatial_mean_ts.nc' )) #%% allways_data_mask = np.isnan(ds).mean(dim='time') == 0 ts_mask = ds.where(allways_data_mask).mean(dim=('latitude', 'longitude')) cl.store_netcdf( ts_mask, filepath= '/Users/semvijverberg/Dropbox/VIDI_Coumou/Paper3_Sem/GDHY_MIRCA2000_Soy/USDA/usda_soy_spatial_mean_ts_allways_data.nc' ) #%% ano = ds - ds.mean(dim='time') plot_maps.plot_corr_maps(ano.isel(time=range(0, 40, 5)), row_dim='time', cbar_vert=.09)