'affinity': 'jaccard', 'linkage': 'average' }) # xr_temp = xrclustered.sel(longitude= # np.arange(232., 295., .25)).sel(latitude=np.arange(25, 50, .25)).copy() fig = plot_maps.plot_labels(xrclustered, wspace=.04, hspace=-.35, cbar_vert=.09, col_dim='q', row_dim='n_clusters') f_name = 'clustering_dendogram_{}'.format(xrclustered.attrs['hash']) path_fig = os.path.join(path_outmain, f_name + '.pdf') plt.savefig(path_fig, bbox_inches='tight') # dpi auto 600 cl.store_netcdf(xrclustered.to_dataset(name='xrclustered'), filepath=os.path.join(rg.path_outmain, f_name + '.nc')) print('hash', xrclustered.attrs['hash']) print(f'{round(time()-t0, 2)}') #%% # ============================================================================= # Clustering correlation Hierarchical Agglomerative Clustering # ============================================================================= # from time import time # t0 = time() # xrclustered, results = cl.correlation_clustering(var_filename, mask=xr_mask, # kwrgs_load={'tfreq':tfreq, # 'seldates':('06-01', '08-31'), # 'selbox':selbox}, # clustermethodkey='AgglomerativeClustering',
'col_dim': 'tfreq', 'x_ticks': np.arange(240, 300, 20), 'y_ticks': np.arange(0, 61, 10) }) f_name = 'clustering_dendogram_{}'.format(xrclustered.attrs['hash']) + '.pdf' path_fig = os.path.join(rg.path_outmain, f_name) fig.savefig(path_fig, bbox_inches='tight') # dpi auto 600 #%% if region != 'init': # try: # ds_cl_ts = core_pp.get_selbox(ds_cl['xrclusteredall'].sel(q=q, n_clusters=c), # selbox) # ds_new = cl.spatial_mean_clusters(var_filename, # ds_cl_ts, # selbox=selbox) # ds_new['xrclusteredall'] = xrclustered # f_name = 'q{}_nc{}'.format(int(q), int(c)) # except: ds_cl_ts = core_pp.get_selbox( ds_cl['xrclusteredall'].sel(tfreq=t, n_clusters=c), selbox) ds_new = cl.spatial_mean_clusters(var_filename, ds_cl_ts, selbox=selbox) ds_new['xrclusteredall'] = xrclustered f_name = 'tf{}_nc{}'.format(int(t), int(c)) filepath = os.path.join(rg.path_outmain, f_name) cl.store_netcdf(ds_new, filepath=filepath, append_hash='dendo_' + xrclustered.attrs['hash']) TVpath = filepath + '_' + 'dendo_' + xrclustered.attrs['hash'] + '.nc'
ds_avail.plot(vmin=30) ds_avail.min() ds_std = (ds_raw - ds_raw.mean(dim='time')) / ds_raw.std(dim='time') linkage = 'ward' c = 2 xrclustered = xrclusteredall.sel(linkage=linkage, n_clusters=c) ds = cl.spatial_mean_clusters(ds_std, xrclustered) df = ds.ts.to_dataframe().pivot_table(index='time', columns='cluster')['ts'] f_name = 'linkage_{}_nc{}'.format(linkage, int(c)) filepath = os.path.join(path_outmain, f_name) cl.store_netcdf(ds, filepath=filepath, append_hash='dendo_' + xrclustered.attrs['hash']) #%% Soy bean USDA raw_filename = '/Users/semvijverberg/Dropbox/VIDI_Coumou/Paper3_Sem/GDHY_MIRCA2000_Soy/USDA/usda_soy.nc' selbox = [250, 290, 28, 50] ds = core_pp.import_ds_lazy(raw_filename, var='variable', selbox=selbox).rename({'z': 'time'}) ds.name = 'Soy_Yield' ds['time'] = pd.to_datetime([f'{y+1949}-01-01' for y in ds.time.values]) ds.attrs['dataset'] = 'USDA' ds.attrs['planting_months'] = 'May/June' ds.attrs['harvest_months'] = 'October'