def PDO_single_split(s, ds, df_splits, PDO_patterns): progress = 100 * (s + 1) / splits.size dates_train = df_splits.loc[s]['TrainIsTrue'][df_splits.loc[s] ['TrainIsTrue']].index train_yrs = np.unique(dates_train.year) dates_all_train = pd.to_datetime( [d for d in dates if d.year in train_yrs]) ### dates_train_yrs = ### dates_test = df_splits.loc[s]['TrainIsTrue'][ ~df_splits.loc[s]['TrainIsTrue']].index n = dates_train.size r = int(100 * n / df_splits.loc[s].index.size) print( f"\rProgress PDO traintest set {progress}%, trainsize=({n}dp, {r}%)", end="") PDO_pattern, solver, adjust_sign = get_PDO( ds.sel(time=dates_all_train)) data_train = find_precursors.calc_spatcov(ds.sel(time=dates_train), PDO_patterns[s]) data_test = find_precursors.calc_spatcov(ds.sel(time=dates_test), PDO_patterns[s]) df_test = pd.DataFrame(data=data_test.values, index=dates_test, columns=['0_901_PDO']) df_train = pd.DataFrame(data=data_train.values, index=dates_train, columns=['0_901_PDO']) df = pd.concat([df_test, df_train]).sort_index() return (df, PDO_pattern)
def single_split_calc_spatcov(precur, precur_arr: np.ndarray, corr: np.ndarray, labels: np.ndarray, a_wghts: np.ndarray, lags: np.ndarray, use_sign_pattern: bool): ts_list = np.zeros( (lags.size), dtype=list ) track_names = [] for il,lag in enumerate(lags): # if lag represents aggregation period: if precur.period_means_array == True: precur_arr = precur.precur_arr.sel(lag=il) pattern = np.copy(corr[il]) # copy to fix ValueError: assignment destination is read-only mask = labels[il] pattern[np.isnan(mask)] = np.nan if use_sign_pattern == True: pattern = np.sign(pattern) if np.isnan(pattern).all(): # no regions of this variable and split nants = np.zeros( (precur_arr.shape[0], 1) ) nants[:] = np.nan ts_list[il] = nants pass else: xrts = find_precursors.calc_spatcov(precur_arr, pattern, area_wght=a_wghts) ts_list[il] = xrts[:,None] track_names.append(f'{lag}..0..{precur.name}' + '_sp') return ts_list, track_names
def get_ts(self, tfreq_ts=1, df_splits=None): if df_splits is None: df_splits = self.df_splits else: df_splits = df_splits splits = self.eofs['split'].values neofs = self.eofs['eof'].values ds = functions_pp.import_ds_timemeanbins(self.filepath, tfreq=tfreq_ts, selbox=self.selbox, start_end_date=self.start_end_date, start_end_year=self.start_end_year) df_data_s = np.zeros( (splits.size) , dtype=object) dates = pd.to_datetime(ds['time'].values) for s in splits: dfs = pd.DataFrame(columns=neofs, index=dates) for i, e in enumerate(neofs): pattern = self.eofs.sel(split=s, eof=e) data = find_precursors.calc_spatcov(ds, pattern) dfs[e] = pd.Series(data.values, index=dates) if i == neofs.size-1: dfs = dfs.merge(df_splits.loc[s], left_index=True, right_index=True) df_data_s[s] = dfs self.df = pd.concat(list(df_data_s), keys=range(splits.size))
def PDO_single_split(s, ds_monthly, ds, df_splits): splits = df_splits.index.levels[0] progress = 100 * (s + 1) / splits.size dates_train_origtime = df_splits.loc[s]['TrainIsTrue'][ df_splits.loc[s]['TrainIsTrue']].index dates_test_origtime = df_splits.loc[s]['TrainIsTrue'][ ~df_splits.loc[s]['TrainIsTrue']].index n = dates_train_origtime.size r = int(100 * n / df_splits.loc[s].index.size) print(f"\rProgress PDO traintest set {progress}%, trainsize=({n}dp, {r}%)", end="") # convert Train test year from original time to monthly train_yrs = np.unique(dates_train_origtime.year) dates_monthly = pd.to_datetime(ds_monthly.time.values) dates_all_train = pd.to_datetime( [d for d in dates_monthly if d.year in train_yrs]) PDO_pattern, solver, adjust_sign = get_PDO( ds_monthly.sel(time=dates_all_train)) data_train = find_precursors.calc_spatcov( ds.sel(time=dates_train_origtime).load(), PDO_pattern) df_train = pd.DataFrame(data=data_train.values, index=dates_train_origtime, columns=['PDO']) if splits.size > 1: data_test = find_precursors.calc_spatcov( ds.sel(time=dates_test_origtime).load(), PDO_pattern) df_test = pd.DataFrame(data=data_test.values, index=dates_test_origtime, columns=['PDO']) df = pd.concat([df_test, df_train]).sort_index() else: df = df_train return (df, PDO_pattern)
def loop_get_spatcov(precur, precur_aggr=None, kwrgs_load: dict = None, force_reload: bool = False, lags: list = None): name = precur.name use_sign_pattern = precur.use_sign_pattern corr_xr = precur.corr_xr prec_labels = precur.prec_labels splits = corr_xr.split if lags is not None: lags = np.array(lags) # ensure lag is np.ndarray corr_xr = corr_xr.sel(lag=lags).copy() prec_labels = prec_labels.sel(lag=lags).copy() else: lags = prec_labels.lag.values dates = pd.to_datetime(precur.precur_arr.time.values) oneyr = functions_pp.get_oneyr(dates) if oneyr.size == 1: # single val per year precursor tfreq = 365 else: tfreq = (oneyr[1] - oneyr[0]).days if precur_aggr is None and force_reload == False: precur_arr = precur.precur_arr if tfreq == 365: precur_arr = precur.precur_arr # use precursor array with temporal aggregation that was used to create # correlation map. When tfreq=365, aggregation (one-value-per-year) # is already done. period used to aggregate was defined by the lag else: if precur_aggr is not None: precur.tfreq = precur_aggr precur.load_and_aggregate_precur(kwrgs_load.copy()) precur_arr = precur.precur_arr precur.area_grid = find_precursors.get_area(precur_arr) if precur_arr.shape[-2:] != corr_xr.shape[-2:]: print('shape loaded precur_arr != corr map, matching coords') corr_xr, prec_labels = functions_pp.match_coords_xarrays( precur_arr, *[corr_xr, prec_labels]) ts_sp = np.zeros((splits.size), dtype=object) for s in splits: ts_list = np.zeros((lags.size), dtype=list) track_names = [] for il, lag in enumerate(lags): # if lag represents aggregation period: if type(precur.lags[il]) is np.ndarray and precur_aggr is None: precur_arr = precur.precur_arr.sel(lag=il) corr_vals = corr_xr.sel(split=s).isel(lag=il) mask = prec_labels.sel(split=s).isel(lag=il) pattern = corr_vals.where(~np.isnan(mask)) if use_sign_pattern == True: pattern = np.sign(pattern) if np.isnan(pattern.values).all(): # no regions of this variable and split nants = np.zeros((precur_arr.time.size, 1)) nants[:] = np.nan ts_list[il] = nants pass else: # if normalize == True: # spatcov_full = calc_spatcov(full_timeserie, pattern) # mean = spatcov_full.sel(time=dates_train).mean(dim='time') # std = spatcov_full.sel(time=dates_train).std(dim='time') # spatcov_test = ((spatcov_full - mean) / std) # elif normalize == False: xrts = find_precursors.calc_spatcov(precur_arr, pattern) ts_list[il] = xrts.values[:, None] track_names.append(f'{lag}..0..{precur.name}' + '_sp') # concatenate timeseries all of lags tsCorr = np.concatenate(tuple(ts_list), axis=1) dates = pd.to_datetime(precur_arr.time.values) ts_sp[s] = pd.DataFrame(tsCorr, index=dates, columns=track_names) # df_sp = pd.concat(list(ts_sp), keys=range(splits.size)) return ts_sp
dates_RV = core_pp.get_subdates(pd.to_datetime(rg.fulltso.time.values), start_end_date=rg.start_end_TVdate) RV_ts = rg.fulltso.sel(time=dates_RV) ds_v300 = core_pp.import_ds_lazy(rg.list_precur_pp[1][1]) dslocal = core_pp.get_selbox(ds_v300, selbox=selbox) datesRW = core_pp.get_subdates(pd.to_datetime(dslocal.time.values), start_end_date=rg.start_end_TVdate) datesRW = datesRW + pd.Timedelta(f'{lag}d') dslocal = dslocal.sel(time=datesRW) wv6local = core_pp.get_selbox(xarray.sel(lag=5), selbox=selbox) patternlocal = wv6local.mean(dim='lag') ts = find_precursors.calc_spatcov(dslocal, patternlocal) ts_15, d = functions_pp.time_mean_bins(ts, tfreq, start_end_date=start_end_TVdate, closed_on_date=start_end_TVdate[-1]) RV_15, d = functions_pp.time_mean_bins(RV_ts, tfreq, start_end_date=start_end_TVdate, closed_on_date=start_end_TVdate[-1]) corr_value = np.corrcoef(ts_15.values.squeeze(), RV_15.values.squeeze())[0][1] print('corr: {:.2f}'.format(corr_value)) values.append(corr_value) plt.plot(range(-9,10), values[1:]) # df_wv6 = ts_15.to_dataframe(name='wv6p2') #%% sst = rg.list_for_MI[2] dates_years = functions_pp.get_oneyr(sst.df_splits.loc[0].index, *event_dates.year) sst.precur_arr.sel(time=dates_years).mean(dim='time').plot(vmin=-.3, vmax=.3, cmap=plt.cm.RdBu_r)
def PDO_temp(filename, ex, df_splits=None): #%% ''' PDO is calculated based upon all data points in the training years, Subsequently, the PDO pattern is projection on the sst.sel(time=dates_train) to enable retrieving the PDO timeseries on a subset on the year. It is similarly also projected on the dates_test. From https://climatedataguide.ucar.edu/climate-data/pacific-decadal-oscillation-pdo-definition-and-indices ''' if df_splits is None: RV = ex[ex['RV_name']] df_splits, ex = functions_pp.rand_traintest_years(RV, ex) kwrgs_pp = { 'selbox': { 'la_min': 20, # select domain in degrees east 'la_max': 65, 'lo_min': 115, 'lo_max': 250 }, 'format_lon': 'only_east' } ds = core_pp.import_ds_lazy(filename, **kwrgs_pp) to_freq = ex['tfreq'] if to_freq != 1: ds, dates = functions_pp.time_mean_bins(ds, ex, to_freq=to_freq, seldays='all') ds['time'] = dates dates = pd.to_datetime(ds.time.values) splits = df_splits.index.levels[0] data = np.zeros((splits.size, ds.latitude.size, ds.longitude.size)) PDO_patterns = xr.DataArray( data, coords=[splits, ds.latitude.values, ds.longitude.values], dims=['split', 'latitude', 'longitude']) list_splits = [] for s in splits: progress = 100 * (s + 1) / splits.size dates_train = df_splits.loc[s]['TrainIsTrue'][df_splits.loc[s] ['TrainIsTrue']].index train_yrs = np.unique(dates_train.year) dates_all_train = pd.to_datetime( [d for d in dates if d.year in train_yrs]) dates_test = df_splits.loc[s]['TrainIsTrue'][ ~df_splits.loc[s]['TrainIsTrue']].index n = dates_train.size r = int(100 * n / df_splits.loc[s].index.size) print( f"\rProgress PDO traintest set {progress}%, trainsize=({n}dp, {r}%)", end="") PDO_patterns[s], solver, adjust_sign = get_PDO( ds.sel(time=dates_all_train)) PDO_patterns[s] = PDO_patterns[s].interpolate_na(dim='longitude') data_train = find_precursors.calc_spatcov(ds.sel(time=dates_train), PDO_patterns[s]) data_test = find_precursors.calc_spatcov(ds.sel(time=dates_test), PDO_patterns[s]) df_test = pd.DataFrame(data=data_test.values, index=dates_test, columns=['0_901_PDO']) df_train = pd.DataFrame(data=data_train.values, index=dates_train, columns=['0_901_PDO']) df = pd.concat([df_test, df_train]).sort_index() list_splits.append(df) df_PDO = pd.concat(list_splits, axis=0, keys=splits) #%% return df_PDO