import find_precursors SKIP_STATES = ['NEW YORK'] months = ['August', 'July', 'June']; metrics = ['corrcoef', 'r2_score'] # 'MAE', 'RMSE', xr_score = xarray.copy() ; xr_score.attrs = {} list_xr = [xr_score.copy().expand_dims('metric', axis=0) for m in metrics] xr_score = xr.concat(list_xr, dim = 'metric') xr_score['metric'] = ('metric', metrics) list_xr = [xr_score.expand_dims('month', axis=0) for m in months] xr_score = xr.concat(list_xr, dim = 'month') xr_score['month'] = ('month', months) for im, month in enumerate(xr_score.month.values): for s, metric in enumerate(xr_score.metric.values): int_metric = {} for STATE in All_states: if STATE not in df_score_States.index or STATE in SKIP_STATES: continue US_States_format = ' '.join([s.lower().capitalize() for s in STATE.split(' ')]) abbrev = enums.us_state_abbrev[US_States_format] integer = States.__dict__[abbrev].real score = df_score_States.loc[STATE].loc[month, metric] int_metric[integer] = score xr_score[im, s] = find_precursors.view_or_replace_labels(xarray.copy(), list(int_metric.keys()), list(int_metric.values())) #%% fig = plot_maps.plot_corr_maps(xr_score, col_dim='month', row_dim='metric', size=4, clevels=np.arange(-.5,0.51,.1), cbar_vert=0, hspace=.1, add_cfeature='LAKES')
's': size, 'zorder': 2, 'color': colors, 'edgecolors': 'black' } ] ]] # regions= list(np.unique(xrclustered)[~np.isnan(np.unique(xrclustered))]) # if region == 'USCA': # dic = {4:3, 3:4} # else: # dic = {2:3, 3:2} # xrclustered = find_precursors.view_or_replace_labels(xrclustered.copy(), regions, # [int(dic.get(n, n)) for n in regions]) if region == 'USCA': mask_cl_n = find_precursors.view_or_replace_labels(xrclustered.copy(), [1, 5]) mask_cl_n = make_country_mask.binary_erosion(~np.isnan(mask_cl_n)) mask_cl_s = ~np.isnan( find_precursors.view_or_replace_labels(xrclustered.copy(), [2])) mask_cl = ~np.logical_or(mask_cl_n, mask_cl_s) elif region == 'USCAnew': mask_cl = find_precursors.view_or_replace_labels(xrclustered.copy(), [1, 4]) mask_cl = np.isnan(mask_cl) elif region == 'init': mask_cl_e = find_precursors.view_or_replace_labels(xrclustered.copy(), [3]) mask_cl_e = make_country_mask.binary_erosion(~np.isnan(mask_cl_e)) mask_cl_w = ~np.isnan( find_precursors.view_or_replace_labels(xrclustered.copy(), [1])) mask_cl = ~np.logical_or(mask_cl_w, mask_cl_e)
def process(rg, lags, fold_method, crossyr): import find_precursors, plot_maps #Preprocess precursors rg.pp_precursors(detrend=True, anomaly=True, selbox=None, format_lon='west_east') #set any nan value in ts to 0 # ds = rg.get_clust(format_lon='west_east')['ts'][:] # ds = ds[np.where(np.isnan(rg.get_clust(format_lon='west_east')['ts'][:]))] # rg.get_clust(format_lon='west_east')['ts'][np.where(np.isnan(rg.get_clust(format_lon='west_east')['ts'][:]))] = 0.0 # ts plot rg.df_fullts.plot() # define train and test periods rg.traintest(method=fold_method, seed=1) testyrs = rg._get_testyrs() print(testyrs) # save target region plot target_cluster = int(rg.list_of_name_path[0][0]) xrclustered = rg.get_clust(format_lon='west_east')['xrclustered'] fig = plot_maps.plot_labels( find_precursors.view_or_replace_labels(xrclustered, regions=target_cluster)) fig.savefig( os.path.join(rg.path_outsub1, 'target_cluster_{target_cluster}.jpeg')) # calculate correlation maps rg.calc_corr_maps() # show correlation maps rg.plot_maps_corr(kwrgs_plot={'clevels': np.arange(-.6, .61, 0.1)}) # rg.cluster_list_MI() # define period names period_dict = { '01': 'January', '02': 'February', '03': 'March', '04': 'April', '05': 'May', '06': 'June', '07': 'July', '08': 'August', '09': 'September', '10': 'October', '11': 'November', '12': 'December' } periodnames = [] if crossyr: for i in lags: month_nr_str = i[0][i[0].find("-") + 1:i[0].find("-") + 1 + 2] #find first instace of "-" +2 periodnames.append(period_dict[month_nr_str]) else: for i in lags: month_nr_str = i[0][:2] #find first instace of "-" +2 periodnames.append(period_dict[month_nr_str]) for i in range(len(rg.list_for_MI)): rg.list_for_MI[i].prec_labels['lag'] = ('lag', periodnames) rg.list_for_MI[i].corr_xr['lag'] = ('lag', periodnames) # View correlation regions rg.quick_view_labels(mean=True, save=True) rg.plot_maps_corr(save=True) # Handle precursor regions rg.get_ts_prec() count = rg._df_count # how many times is each precursor regions found in the different training sets print(count) df_prec_regions = find_precursors.labels_to_df( rg.list_for_MI[0].prec_labels) df_prec_regions # center lat,lon coordinates and size (in number of gridcells) return rg
def plot_ss2(agg_level, skillscores, col_wrap, metric=None): #%% import find_precursors cluster_nc_path = get_list_of_name_path(agg_level, 1)[0][1] ds = core_pp.import_ds_lazy(cluster_nc_path, format_lon='west_east') cluster_labels_org = ds.coords['cluster'] ds = ds['xrclustered'] #create list of skill score names skillscores_multi_idx = skillscores.index.levels ss_list = [] for i in skillscores_multi_idx[1:][0]: for j in skillscores_multi_idx[1:][1]: ss_name = '{}_{}'.format(i, j) ss_list.append(ss_name) if metric is not None: #only apply single metric ss_list = [metric] #add dimensions and coordinates xr_score = ds.copy() xr_score.attrs = {} list_xr = [xr_score.copy().expand_dims('metric', axis=0) for m in ss_list] xr_score = xr.concat(list_xr, dim='metric') xr_score['metric'] = ('metric', ss_list) list_xr = [ xr_score.copy().expand_dims('target_month', axis=0) for m in skillscores.columns ] xr_score = xr.concat(list_xr, dim='target_month') xr_score['target_month'] = ('target_month', skillscores.columns) #replace labels with skillscores for metric_nr, metric in enumerate(xr_score.metric.values): test_or_train = metric[:metric.find("_")] ss = metric[metric.find("_") + 1:] for month_nr, month in enumerate(xr_score.target_month.values): #slice over metric, month in skill score df metric_cluster_dict = skillscores[month].xs( (test_or_train, ss), level=(1, 2)).to_dict() #replace cluster_labels with their skill score cluster_labels_new = [ metric_cluster_dict.get(x, x) for x in cluster_labels_org.values ] #set all non replaced values of cluster labels to np.nan cluster_labels_new = [ np.nan if isinstance(x, np.int32) else x for x in cluster_labels_new ] #replace values xarr_labels_to_replace = ds xr_score[month_nr, metric_nr] = find_precursors.view_or_replace_labels( xarr_labels_to_replace, regions=list(cluster_labels_org.values), replacement_labels=cluster_labels_new) #set col wrap and subtitles col_wrap = col_wrap #int determines nr of cols import math subtitles = [[] for i in range( int(math.ceil(xr_score.target_month.values.size / col_wrap)))] total_nr_fields = col_wrap * len(subtitles) j = -1 for i, month in enumerate(xr_score.target_month.values): if i % col_wrap == 0: j += 1 subtitles[j].append('{}, {}'.format(month, metric)) if i == max( list(enumerate(xr_score.target_month.values)) )[0] and total_nr_fields > xr_score.target_month.values.size: for k in range(total_nr_fields - xr_score.target_month.values.size): subtitles[j].append('0') #plot fig = plot_maps.plot_corr_maps(xr_score, col_dim='target_month', row_dim='metric', size=4, clevels=np.arange(-.5, 0.51, .1), cbar_vert=-0.1, hspace=-0.2, subtitles=subtitles, col_wrap=col_wrap) #%% return fig
'fontsize': 18 } } save = True # rg.plot_maps_corr(var='z500', save=save, # min_detect_gc=min_detect_gc, # kwrgs_plot=kwrgs_plot, # append_str=''.join(map(str, z500_green_bb))+TV+str(cluster_label)) z500 = rg.list_for_MI[0] xrvals, xrmask = RGCPD._get_sign_splits_masked(z500.corr_xr, min_detect_gc, z500.corr_xr['mask']) fig = plot_maps.plot_corr_maps(xrvals, xrmask, **kwrgs_plot) rg.get_clust() xrclustered = find_precursors.view_or_replace_labels(rg.ds['xrclustered'], cluster_label) fig.axes[0].contour(xrclustered.longitude, xrclustered.latitude, np.isnan(xrclustered), transform=ccrs.PlateCarree(), levels=[0, 2], linewidths=1, linestyles=['solid'], colors=['white']) filename = os.path.join(rg.path_outsub1, 'z500vsmx2t_' + rg.hash + '_' + str(cluster_label)) fig.savefig(filename + rg.figext, bbox_inches='tight') #%% upon request of reviewer, using a smaller bounding box plot # kwrgs_plot.update({'drawbox':[(0,0), (155,300,20,73)]})