def prep_wwv_proxy(): """ Make a wwv proxy index that uses the K-index from Bunge and Clarke (2014) for the time period between 1955 and 1979 """ reader_wwv = data_reader(startdate='1980-01', enddate='2018-12') wwv = reader_wwv.read_csv('wwv') reader_kindex = data_reader(startdate='1955-01', enddate='1979-12') kindex = reader_kindex.read_csv('kindex') * 10e12 wwv_proxy = kindex.append(wwv) wwv_proxy.to_csv(join(processeddir, f'wwv_proxy.csv'), header=True)
def calc_warm_pool_edge(): """ calculate the warm pool edge """ reader = data_reader(startdate='1948-01', enddate='2020-10', lon_min=120, lon_max=290) # enddate: was 2018-12 sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='') sst_eq = sst.loc[dict(latitude=0)] warm_pool_edge = np.zeros(sst_eq.shape[0]) indeces = np.zeros(sst_eq.shape[0]) # TODO not very efficent for i in range(sst_eq.shape[0]): index = np.argwhere(sst_eq[i].values > 28.).max() indeces[i] = index slope = sst_eq[i, index] - sst_eq[i, index - 1] intercept28C = (sst_eq[i, index] - 28.) * slope + index warm_pool_edge[i] = intercept28C * 2.5 * 111.321 df = pd.DataFrame(data=warm_pool_edge, index=sst.time.values, columns=['total']) df.index.name = 'time' df.to_csv(join(processeddir, 'wp_edge.csv'))
def evaluation_nll(model_name, mean_name='mean', std_name='std', filename=None, start='1963-01', end='2017-12'): """ Evaluate the model using the negativ log-likelihood skill for the full time series. """ reader = data_reader(startdate=start, enddate=end) # scores for the full timeseries nll = np.zeros(n_lead) # ONI observation obs = reader.read_csv('oni') for i in range(n_lead): pred_all = reader.read_forecasts(model_name, lead_times[i], filename=filename) mean = pred_all[mean_name] std = pred_all[std_name] # calculate all seasons scores nll[i] = nll_gaussian(obs, mean, std) return nll
def evaluation_correlation(model_name, variable_name='mean', start='1963-01', end='2017-12'): """ Evaluate the model using the correlation skill for the full time series. :type model_name: str :param model_name: The name of the model. :type variable_name: str :param variable_name: The name of the variable which shell be evaluated\ against the ONI prediction. :returns: The correlation skill for the 0, 3, 6, 9, 12 and 15-month lead\ time and the corresponding p values. """ reader = data_reader(startdate=start, enddate=end) # scores for the full timeseries r = np.zeros(n_lead) p = np.zeros(n_lead) # ONI observation obs = reader.read_csv('oni') for i in range(n_lead): pred_all = reader.read_forecasts(model_name, lead_times[i]) pred = pred_all[variable_name] # calculate all seasons scores r[i], p[i] = pearsonr(obs, pred) return r, p
def evaluation_srmse(model_name, variable_name='mean'): """ Evaluate the model using the standardized root-mean-squarred error (SRMSE) for the full time series. Standardized means that the the the RMSE of each season is divided by the corresponding standard deviation of the ONI in that season (standard deviation has a seasonal cycle). Then, these seasonal SRMSE averaged to get the SRMSE of the full time series.. :type model_name: str :param model_name: The name of the model. :type variable_name: str :param variable_name: The name of the variable which shell be evaluated\ against the ONI prediction. :returns: The standardized RMSE for the 0, 3, 6, 9, 12 and 15-month lead\ time. """ reader = data_reader(startdate='1963-01', enddate='2017-12') # scores for the full timeseries srmse = np.zeros(n_lead) # ONI observation obs = reader.read_csv('oni') for i in range(n_lead): pred_all = reader.read_forecasts(model_name, lead_times[i]) pred = pred_all[variable_name] srmse[i] = mean_srmse(obs, pred, obs.index - pd.tseries.offsets.MonthBegin(1)) return srmse
def evaluation_decadal_nll(model_name, mean_name='mean', std_name='std', filename=None): """ Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 \ using the negative log-likelihood. """ reader = data_reader(startdate='1963-01', enddate='2017-12') # decadal scores decadal_nll = np.zeros((n_lead, n_decades - 1)) # ONI observation obs = reader.read_csv('oni') obs_time = obs.index for i in range(n_lead): pred_all = reader.read_forecasts(model_name, lead_times[i], filename) pred_mean = pred_all[mean_name] pred_std = pred_all[std_name] for j in range(n_decades - 1): indeces = (obs_time >= f'{decades[j]}-01-01') & ( obs_time <= f'{decades[j+1]}-12-01') decadal_nll[i, j] = nll_gaussian(obs[indeces], pred_mean[indeces], pred_std[indeces]) return decadal_nll
def load_data(self, variable, dataset, processed='anom', startyear=1949, endyear=2018, lon_min=120, lon_max=280, lat_min=-30, lat_max=30): """ Load data for PCA analysis from the desired postprocessed data set. :type variable: str :param variable: The variable for which the PCA will be done. :type dataset: str :param dataset: The data set that should be used for the PCA. :type processed: str :param processed: Either '','anom' or 'normanom'. :param startyear: The start year for the time series for which the PCA\ is done. :param endyear: The last year for the time series for which the PCA\ is done. :param lon_min,lon_max: The minimum and the maximum values of the\ longitude grid for which the metrics shell be computed \ (from 0 to 360 degrees east) :param lat_min,lat_max: The min and the max values of the latitude\ grid for which the metrics shell be computed (from -180 to 180 degrees\ east) """ self.variable = variable self.dataset = dataset self.processed = processed self.startdate = pd.to_datetime(str(startyear)) self.enddate = (pd.to_datetime(str(endyear)) + pd.tseries.offsets.YearEnd(0)) self.reader = data_reader(startdate=self.startdate, enddate=self.enddate, lon_min=lon_min, lon_max=lon_max, lat_min=lat_min, lat_max=lat_max) data = self.reader.read_netcdf(variable, dataset, processed) self.set_eof_array(data)
def __init__(self, variable, dataset, processed='anom', threshold=None, edge_density=None, startyear=1948, endyear=2018, window_size=12, lon_min=120, lon_max=260, lat_min=-30, lat_max=30, verbose=0): self.variable = variable self.dataset = dataset self.processed = processed self.threshold = threshold self.edge_density = edge_density self.startyear = str(startyear) self.endyear = str(endyear) self.startdate = pd.to_datetime(self.startyear) self.enddate = pd.to_datetime(self.endyear) \ + pd.tseries.offsets.YearEnd(0) self.window_size = window_size self.window_start = self.startdate self.window_end = self.window_start \ + pd.tseries.offsets.MonthEnd(self.window_size) self.lon_min = lon_min self.lon_max = lon_max self.lat_min = lat_min self.lat_max = lat_max self.reader = data_reader(startdate=self.window_start, enddate=self.window_end, lon_min=self.lon_min, lon_max=self.lon_max, lat_min=self.lat_min, lat_max=self.lat_max) self.initalizeSeries() if verbose == 0: logger.setLevel(logging.DEBUG) elif verbose == 1: logger.setLevel(logging.INFO) elif verbose == 2: logger.setLevel(logging.WARNING) elif verbose == 3: logger.setLevel(logging.ERROR)
def prep_wwv_proxy(): """ Make a wwv proxy index that uses the K-index from Bunge and Clarke (2014) for the time period between 1955 and 1979 """ print(f"Prepare WWV proxy.") wwv_raw = pd.read_csv(join(rawdir, 'wwv.dat'), delim_whitespace=True, header=4) wwv_end = str(wwv_raw['date'].iloc[-1]) endyr = wwv_end[:4] endmth = wwv_end[4:] reader_wwv = data_reader(startdate='1980-01', enddate=endyr + '-' + endmth) wwv = reader_wwv.read_csv('wwv') reader_kindex = data_reader(startdate='1955-01', enddate='1979-12') kindex = reader_kindex.read_csv('kindex') * 10e12 wwv_proxy = kindex.append(wwv) wwv_proxy.to_csv(join(processeddir, f'wwv_proxy.csv'), header=True)
def pipeline_noise(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the MLR is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ # initialize the reader reader = data_reader(startdate='1960-01', enddate='2017-12') np.random.seed(0) # load data oni = reader.read_csv('oni') wwv = reader.read_csv('wwv_proxy') iod = reader.read_csv('iod') # the shift data by 3 in addition to lead time shift (due to definition # of lead time) as in barnston et al. (2012) shift = 3 # make feature Xorg = np.stack((oni, wwv, iod), axis=1) for i in range(100): random_noise = np.random.normal(size=len(oni)).reshape(len(oni), 1) Xorg = np.concatenate((Xorg, random_noise), axis=1) X = Xorg[:-lead_time-shift,:] # arange label yorg = oni.values y = yorg[lead_time + shift:] # get the time axis of the label timey = oni.index[lead_time + shift:] if return_persistance: y_persistance = yorg[: - lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
def evaluation_seasonal_correlation(model_name, variable_name='mean', background='all'): """ Evaluate the model in different seasons using the correlation skill. :type model_name: str :param model_name: The name of the model. :type variable_name: str :param variable_name: The name of the variable which shell be evaluated\ against the ONI prediction. :returns: The correlation skill for different seasons and the \ 0, 3, 6, 9, 12 and 15-month lead time and the corresponding p values for\ the respective seasons and lead times. The returned arrays have the shape \ (lead time, season). The season corresponding to the the array entry [:,0]\ is DJF and to [:,1] is JFM (and so on). """ reader = data_reader(startdate='1963-01', enddate='2017-12') # seasonal scores seasonal_r = np.zeros((n_lead, 12)) seasonal_p = np.zeros((n_lead, 12)) # ONI observation oni = reader.read_csv('oni') if background == "el-nino-like": obs = oni[(oni.index.year >= 1982) & (oni.index.year <= 2001)] elif background == "la-nina-like": obs = oni[(oni.index.year < 1982) | (oni.index.year > 2001)] elif background == "barnston_2019": obs = oni[(oni.index.year >= 1982) | (oni.index.year > 2015)] elif background == "all": obs = oni obs_time = obs.index for i in range(n_lead): pred_all = reader.read_forecasts(model_name, lead_times[i]).loc[{ 'target_season': obs_time }] pred = pred_all[variable_name] seasonal_r[i, :], seasonal_p[i, :] = seasonal_correlation( obs, pred, obs_time - pd.tseries.offsets.MonthBegin(1)) return seasonal_r, seasonal_p
def evaluation_decadal_correlation(model_name, variable_name='mean', start='1963-01', end='2017-12'): """ Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 using the correlation skill- :type model_name: str :param model_name: The name of the model. :type variable_name: str :param variable_name: The name of the variable which shell be evaluated\ against the ONI prediction. :returns: The correlation skill for the 0, 3, 6, 9, 12 and 15-month lead\ time and the corresponding p values for the respective decades. The\ returned arrays have the shape (lead time, decades). """ reader = data_reader(startdate=start, enddate=end) # decadal scores decadal_r = np.zeros((n_lead, n_decades - 1)) decadal_p = np.zeros((n_lead, n_decades - 1)) # ONI observation obs = reader.read_csv('oni') obs_time = obs.index for i in range(n_lead): pred_all = reader.read_forecasts(model_name, lead_times[i]) pred = pred_all[variable_name] for j in range(n_decades - 1): indeces = (obs_time >= f'{decades[j]}-01-01') & ( obs_time <= f'{decades[j+1]-1}-12-01') decadal_r[i, j], decadal_p[i, j] = pearsonr(obs[indeces].values, pred[indeces].values) return decadal_r, decadal_p
def evaluation_decadal_srmse(model_name, variable_name='mean', decadal=None): """ Evaluate the model in the decades 1963-1971, 1972-1981, ..., 2012-2017 \ using the standardized RMSE. :type model_name: str :param model_name: The name of the model. :type variable_name: str :param variable_name: The name of the variable which shell be evaluated\ against the ONI prediction. :returns: The SRMSE for the 0, 3, 6, 9, 12 and 15-month lead\ time respective decades. The returned array has the shape (lead time, \ decades). """ reader = data_reader(startdate='1963-01', enddate='2017-12') # decadal scores decadal_srmse = np.zeros((n_lead, n_decades - 1)) # ONI observation obs = reader.read_csv('oni') obs_time = obs.index for i in range(n_lead): pred_all = reader.read_forecasts(model_name, lead_times[i]) pred = pred_all[variable_name] for j in range(n_decades - 1): indeces = (obs_time >= f'{decades[j]}-01-01') & ( obs_time <= f'{decades[j+1]}-12-01') decadal_srmse[i, j] = mean_srmse( obs[indeces], pred[indeces], obs.index[indeces] - pd.tseries.offsets.MonthBegin(1)) return decadal_srmse
def pipeline_small(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate='2017-12') # indeces oni = reader.read_csv('oni') iod = reader.read_csv('iod') wwv = reader.read_csv('wwv_proxy') # network metrics network_ssh = reader.read_statistic('network_metrics', variable='zos', dataset='ORAS4', processed="anom") c2_ssh = network_ssh['fraction_clusters_size_2'] H_ssh = network_ssh['corrected_hamming_distance'] #wind stress taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))] taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon') # decadel variation of leading eof pca_dec = reader.read_statistic('pca', variable='dec_sst', dataset='ERSSTv5', processed='anom')['pca1'] # time lag time_lag = 2 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack( ( oni, oni.index.month, wwv, #iod, #taux_WP_mean, #c2_ssh, H_ssh, pca_dec), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time - shift, :] X = include_time_lag(X, max_lag=time_lag) # arange label yorg = oni.values y = yorg[lead_time + time_lag + shift:] # get the time axis of the label timey = oni.index[lead_time + time_lag + shift:] if return_persistance: y_persistance = yorg[time_lag:-lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
from sklearn.preprocessing import StandardScaler from ninolearn.IO.read_processed import data_reader from ninolearn.plot.evaluation import plot_correlation from ninolearn.plot.prediction import plot_prediction from ninolearn.learn.evaluation.skillMeasures import rmse from ninolearn.learn.models.dem import DEM from ninolearn.utils import print_header, include_time_lag, basin_means from ninolearn.pathes import modeldir K.clear_session() #%% ============================================================================= # read data # ============================================================================= reader = data_reader(startdate='1960-01', enddate='2017-12') # NINO3.4 Index nino34 = reader.read_csv('oni') #%% # Other indeces iod = reader.read_csv('iod') wwv = reader.read_csv('wwv_proxy') # seasonal cycle sc = np.cos(np.arange(len(nino34)) / 12 * 2 * np.pi) # SSH network metrics network_ssh = reader.read_statistic('network_metrics', variable='zos', dataset='ORAS4',
1991, 1994, 2002, 2004, 2006, 2009 #, 2015 ]) lanina_ep = np.array([1964, 1970, 1973, 1988, 1998, 2007, 2010]) lanina_cp = np.array( [1954, 1955, 1967, 1971, 1974, 1975, 1984, 1995, 2000, 2001, 2011]) year = 2008 reader = data_reader(startdate=f'{year}-01', enddate=f'{year}-12', lon_min=30, lon_max=300) oni = reader.read_csv('oni') spring = np.array([month in [3, 4, 5] for month in oni.index.month]) summer = np.array([month in [6, 7, 8] for month in oni.index.month]) autumn = np.array([month in [9, 10, 11] for month in oni.index.month]) winter = np.array([month in [11, 12] for month in oni.index.month]) winter_p1 = np.array([month in [1, 2] for month in oni.index.month]) index = winter # ============================================================================= # Read data
def read_ssta(index, data_dir, get_mask=False, stack_lon_lat=True, resolution=2.5, dataset="ERSSTv5", fill_nan=0, start_date='1871-01', end_date='2019-12', lon_min=190, lon_max=240, lat_min=-5, lat_max=5, reader=None): """ :param index: choose target index (e.g. ONI, Nino3.4, ICEN) :param start_date: :param end_date: :param lon_min: :param lon_max: :param lat_min: :param lat_max: :param reader: If a data_reader is passed, {start,end}_date and {lat, lon}_{min, max} will be ignored. :return: """ if index in ["Nino3.4", "ONI"]: k = 5 if index == "Nino3.4" else 3 elif index == "ICEN": k = 3 elif index[-3:] == "mon": k = int(index[-4]) # eg 1mon else: raise ValueError("Unknown index") if reader is None: reader = data_reader(data_dir=data_dir, startdate=start_date, enddate=end_date, lon_min=lon_min, lon_max=lon_max, lat_min=lat_min, lat_max=lat_max) check_chosen_coordinates(index, lon_min=lon_min, lon_max=lon_max, lat_min=lat_min, lat_max=lat_max) resolution_suffix = f"{resolution}x{resolution}" ssta = reader.read_netcdf('sst', dataset=dataset, processed='anom', suffix=resolution_suffix) ssta = ssta.rolling(time=k).mean()[ k - 1:] # single months SSTAs --> rolling mean over k months SSTAs if stack_lon_lat: lats, lons = ssta.get_index('lat'), ssta.get_index('lon') ssta = ssta.stack(cord=['lat', 'lon']) ssta.attrs["Lons"] = lons ssta.attrs["Lats"] = lats if fill_nan is not None: if fill_nan == "trim": ssta_old_index = ssta.get_index('cord') ssta = ssta.dropna(dim='cord') print( f"Dropped {len(ssta_old_index) - len(ssta.get_index('cord'))} nodes." ) # print("Dropped coordinates:", set(ssta_old_index).difference(set(ssta.get_index("cord")))) # print(flattened_ssta.loc["1970-01", (0, 290)]) --> will raise error else: ssta = ssta.fillna(fill_nan) if get_mask: index_mask, train_mask = get_index_mask( ssta, index=index, flattened_too=True, is_data_flattened=stack_lon_lat) train_mask = np.array(train_mask) return ssta, train_mask return ssta
1987, 1991, 1994, 2002, 2004, 2006, 2009 #, 2015 ]) lanina_ep = np.array([1964, 1970, 1973, 1988, 1998, 2007, 2010]) lanina_cp = np.array( [1954, 1955, 1967, 1971, 1974, 1975, 1984, 1995, 2000, 2001, 2011]) reader = data_reader(startdate='1974-06', enddate='2017-12', lon_min=30, lon_max=300) oni = reader.read_csv('oni') spring = np.array([month in [ 1, 2, 3, ] for month in oni.index.month]) summer = np.array([month in [6, 7, 8] for month in oni.index.month]) autumn = np.array([month in [12] for month in oni.index.month]) winter = np.array([month in [12] for month in oni.index.month]) winter_p1 = np.array([month in [1, 2] for month in oni.index.month])
1991, 1994, 2002, 2004, 2006, 2009 #, 2015 ]) lanina_ep = np.array([1964, 1970, 1973, 1988, 1998, 2007, 2010]) lanina_cp = np.array( [1954, 1955, 1967, 1971, 1974, 1975, 1984, 1995, 2000, 2001, 2011]) year = 2008 reader = data_reader(startdate=f'1980-01', enddate=f'2010-12', lon_min=30, lon_max=300) # ============================================================================= # Read data # ============================================================================= taux = reader.read_netcdf('taux', dataset='NCEP', processed='') taux = taux.sortby('lat', ascending=False) tauy = reader.read_netcdf('tauy', dataset='NCEP', processed='') tauy = tauy.sortby('lat', ascending=False) sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='') sst = sst.sortby('lat', ascending=False) #ssh = reader.read_netcdf('sshg', dataset='GODAS', processed='anom') #ssh = ssh.sortby('lat', ascending=False) #olr = - reader.read_netcdf('olr', dataset='NCAR', processed='anom')
import matplotlib.pyplot as plt from ninolearn.IO.read_processed import data_reader from ninolearn.private import plotdir from os.path import join reader = data_reader(startdate='1950-02', enddate='2018-12', lon_min=30) oni = reader.read_csv('oni') iod = reader.read_csv('iod') iod = iod.rolling(window=3, center=False).mean() plt.close("all") plt.subplots(figsize=(8, 3)) plt.fill_between(oni.index, oni, 0, label="ONI") plt.plot(iod, 'k', label="DMI") plt.legend(loc=2) plt.xlim(iod.index[0], iod.index[-1]) plt.ylim(-2.5, 2.5) plt.xlabel("Time [Year]") plt.ylabel('Index Value') plt.tight_layout() plt.savefig(join(plotdir, 'iod_oni.pdf'))
def pipeline(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate=endyr+'-'+endmth) # indices oni = reader.read_csv('oni') dmi = reader.read_csv('dmi') wwv = reader.read_csv('wwv_proxy') # seasonal cycle cos = np.cos(np.arange(len(oni))/12*2*np.pi) # wind stress taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux_WP = taux.loc[dict(lat=slice(2.5,-2.5), lon=slice(120, 160))] taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon') # include values from 3 and 6 months previously as predictor variables n_lags = 3 step = 3 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack((oni, wwv, dmi, cos, taux_WP_mean ), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) np.save(join(infodir,'Xorg'), Xorg) # arange the feature array X = Xorg[:-lead_time-shift,:] X = include_time_lag(X, n_lags=n_lags, step=step) # arange label yorg = oni.values y = yorg[lead_time + n_lags*step + shift:] # get the time axis of the label timey = oni.index[lead_time + n_lags*step + shift:] if return_persistance: y_persistance = yorg[n_lags*step: - lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
from ninolearn.IO.read_processed import data_reader import matplotlib.pyplot as plt plt.close("all") reader = data_reader(startdate='1978-01', enddate='2018-12', lon_min=30) wp_edge = reader.read_csv('wp_edge', processed='total') wp_edge_dec = wp_edge.rolling(24, center=False).mean() olr = reader.read_netcdf('olr', dataset='NCAR', processed='anom') olr = olr.sortby('lat', ascending=False) olr_cp = olr.loc[dict(lat=slice(2.5, -2.5), lon=slice(160, 180))].mean(dim='lat', skipna=True).mean(dim='lon', skipna=True) olr_cp_dec = olr_cp.rolling(time=24, center=False).mean() time = wp_edge.index fig, ax1 = plt.subplots() ax1.plot(time, -olr_cp_dec, 'r', label='Mean SST') ax2 = ax1.twinx() ax2.plot(time, wp_edge_dec, 'k')
def pipeline_small(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate='2017-12') # indeces oni = reader.read_csv('oni') wwv = reader.read_csv('wwv_proxy') # seasonal cycle sc = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi) # decadel variation of leading eof pca_dec = reader.read_statistic('pca', variable='dec_sst', dataset='ERSSTv5', processed='anom')['pca1'] # time lag time_lag = 3 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack((oni, sc, wwv, pca_dec), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time - shift, :] X = include_time_lag(X, max_lag=time_lag) # arange label yorg = oni.values y = yorg[lead_time + time_lag + shift:] # get the time axis of the label timey = oni.index[lead_time + time_lag + shift:] if return_persistance: y_persistance = yorg[time_lag:-lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
from ninolearn.preprocess.pca import pca from ninolearn.IO.read_processed import data_reader import matplotlib.pyplot as plt plt.close("all") # ============================================================================= # Decadel PCAs # ============================================================================= reader = data_reader(startdate='1955-02', enddate='2018-12', lon_min=120, lon_max=300) hca = reader.read_netcdf('hca', dataset='NODC', processed='anom') hca_decadel = hca.rolling(time=60, center=False).mean() hca_decadel.attrs = hca.attrs.copy() hca_decadel.name = f'dec_{hca.name}' pca_hca_decadel = pca(n_components=6) pca_hca_decadel.set_eof_array(hca_decadel) pca_hca_decadel.compute_pca() pca_hca_decadel.plot_eof() pca_hca_decadel.save(extension='.csv', filename='dec_hca_NODC_anom') reader = data_reader(startdate='1955-01', enddate='2018-12',lon_min=120, lon_max=300) sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='anom') sst_decadel = sst.rolling(time=60, center=False).mean() sst_decadel.attrs = sst.attrs.copy() sst_decadel.name = f'dec_{sst.name}'
from ninolearn.IO.read_processed import data_reader from ninolearn.utils import basin_means from ninolearn.utils import pearson_lag from ninolearn.private import plotdir import numpy as np from os.path import join plt.close("all") start = '1963' end = '2017' reader = data_reader(startdate=f'{start}-01', enddate=f'{end}-12', lon_min=30) oni = reader.read_csv('oni') wwv = reader.read_csv('wwv_proxy') taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux_WP_mean, taux_CP_mean, taux_EP_mean = basin_means(taux, lat1=7.5, lat2=-7.5) iod = reader.read_csv('iod') network = reader.read_statistic('network_metrics', variable='zos', dataset='ORAS4', processed="anom") c2 = network['fraction_clusters_size_2'] H = network['corrected_hamming_distance'] max_lag = 19 lead_time_arr = np.arange(-3, max_lag-2)
def pipeline(lead_time, return_persistance=False): """ Data pipeline for the processing of the data before the Deep Ensemble is trained. :type lead_time: int :param lead_time: The lead time in month. :type return_persistance: boolean :param return_persistance: Return as the persistance as well. :returns: The feature "X" (at observation time), the label "y" (at lead time), the target season "timey" (least month) and if selected the label at observation time "y_persistance". Hence, the output comes as: X, y, timey, y_persistance. """ reader = data_reader(startdate='1960-01', enddate='2017-12') # indeces oni = reader.read_csv('oni') iod = reader.read_csv('iod') wwv = reader.read_csv('wwv_proxy') # seasonal cycle cos = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi) # network metrics network_ssh = reader.read_statistic('network_metrics', variable='zos', dataset='ORAS4', processed="anom") H_ssh = network_ssh['corrected_hamming_distance'] #wind stress taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))] taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon') # time lag n_lags = 3 step = 3 # shift such that lead time corresponds to the definition of lead time shift = 3 # process features feature_unscaled = np.stack(( oni, wwv, iod, cos, taux_WP_mean, H_ssh, ), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time - shift, :] X = include_time_lag(X, n_lags=n_lags, step=step) # arange label yorg = oni.values y = yorg[lead_time + n_lags * step + shift:] # get the time axis of the label timey = oni.index[lead_time + n_lags * step + shift:] if return_persistance: y_persistance = yorg[n_lags * step:-lead_time - shift] return X, y, timey, y_persistance else: return X, y, timey
def data_gen(): k = 0 kmax = len(label) while k < kmax: yield true.loc[time[k]], pred.loc[time[k]], time[k] k += 1 ani = animation.FuncAnimation(fig, update, data_gen, interval=100) return ani #%% ============================================================================= # Data # ============================================================================= reader = data_reader(startdate='1959-11', enddate='2017-12') sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='anom').rolling(time=3).mean()[2:] oni = reader.read_csv('oni')[2:] # select feature = sst.copy(deep=True) label = sst.copy(deep=True) # preprocess data feature_unscaled = feature.values.reshape(feature.shape[0], -1) label_unscaled = label.values.reshape(label.shape[0], -1) scaler_f = StandardScaler() Xorg = scaler_f.fit_transform(feature_unscaled)
stdp2 = stdp2.where(stdp2_cls != i, other=thresholds[i] + 0.125) mode = mode.where(mode_cls != i, other=thresholds[i] + 0.125) ds_save = xr.Dataset({ 'median': median, 'stdm1': stdm1, 'stdp1': stdp1, 'stdm2': stdm2, 'stdp2': stdp2 }) ds_save.to_netcdf(join(processeddir, f'{model_name}_prob_forecasts.nc')) start = '1963-01' end = '2017-12' reader = data_reader(startdate=start, enddate=end) oni = reader.read_csv('oni') #%% ============================================================================= # Plot Hindcasts # ============================================================================= def plot_timeseries(lead, ax): ax.axhline(0, c='grey', linestyle='--') ax.plot(oni, 'k', lw=2) ax.set_xlim(oni.index[0], oni.index[-1]) ax.fill_between(data.target_season.values, stdm2.loc[{ 'lead': lead }], stdp2.loc[{
from mpl_toolkits.basemap import Basemap from sklearn import linear_model from sklearn.metrics import r2_score import numpy as np import matplotlib.pyplot as plt from scipy.stats import pearsonr from ninolearn.IO.read_processed import data_reader from os.path import join from ninolearn.private import plotdir # ============================================================================= # Read # ============================================================================= reader = data_reader(startdate='1980-01', enddate='2018-11', lon_min=100, lon_max=300) iod = reader.read_csv('iod') nino = reader.read_csv('nino3M') taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom') taux = taux.sortby('lat', ascending=False) sst = reader.read_netcdf('sst', dataset='ERSSTv5', processed='anom') sst = sst.sortby('lat', ascending=False) olr = -reader.read_netcdf('olr', dataset='NCAR', processed='anom') olr = olr.sortby('lat', ascending=False) # ============================================================================= # Regression analysis # =============================================================================