Esempio n. 1
0
def pipeline_small(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """
    reader = data_reader(startdate='1960-01', enddate='2017-12')

    # indeces
    oni = reader.read_csv('oni')

    iod = reader.read_csv('iod')
    wwv = reader.read_csv('wwv_proxy')

    # network metrics
    network_ssh = reader.read_statistic('network_metrics',
                                        variable='zos',
                                        dataset='ORAS4',
                                        processed="anom")
    c2_ssh = network_ssh['fraction_clusters_size_2']
    H_ssh = network_ssh['corrected_hamming_distance']

    #wind stress
    taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')

    taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))]
    taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon')

    # decadel variation of leading eof
    pca_dec = reader.read_statistic('pca',
                                    variable='dec_sst',
                                    dataset='ERSSTv5',
                                    processed='anom')['pca1']

    # time lag
    time_lag = 2

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack(
        (
            oni,
            oni.index.month,
            wwv,
            #iod,
            #taux_WP_mean,
            #c2_ssh,
            H_ssh,
            pca_dec),
        axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)

    # arange the feature array
    X = Xorg[:-lead_time - shift, :]
    X = include_time_lag(X, max_lag=time_lag)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + time_lag + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + time_lag + shift:]

    if return_persistance:
        y_persistance = yorg[time_lag:-lead_time - shift]
        return X, y, timey, y_persistance
    else:
        return X, y, timey
Esempio n. 2
0
def pipeline(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """
    reader = data_reader(startdate='1960-01', enddate='2017-12')

    # indeces
    oni = reader.read_csv('oni')

    iod = reader.read_csv('iod')
    wwv = reader.read_csv('wwv_proxy')

    # seasonal cycle
    cos = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi)

    # network metrics
    network_ssh = reader.read_statistic('network_metrics',
                                        variable='zos',
                                        dataset='ORAS4',
                                        processed="anom")
    H_ssh = network_ssh['corrected_hamming_distance']

    #wind stress
    taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')

    taux_WP = taux.loc[dict(lat=slice(2.5, -2.5), lon=slice(120, 160))]
    taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon')

    # time lag
    n_lags = 3
    step = 3

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack((
        oni,
        wwv,
        iod,
        cos,
        taux_WP_mean,
        H_ssh,
    ),
                                axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)

    # arange the feature array
    X = Xorg[:-lead_time - shift, :]
    X = include_time_lag(X, n_lags=n_lags, step=step)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + n_lags * step + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + n_lags * step + shift:]

    if return_persistance:
        y_persistance = yorg[n_lags * step:-lead_time - shift]
        return X, y, timey, y_persistance

    else:
        return X, y, timey
Esempio n. 3
0
        c2_ssh,
        pca_dec),
    axis=1)

#feature_unscaled = np.concatenate((feature_unscaled, sst_equator),
#                                 axis=1)

scaler = StandardScaler()
Xorg = scaler.fit_transform(feature_unscaled)

Xorg = np.nan_to_num(Xorg)

X = Xorg[:-lead_time - shift, :]
futureX = Xorg[-lead_time - shift - time_lag:, :]

X = include_time_lag(X, max_lag=time_lag)
futureX = include_time_lag(futureX, max_lag=time_lag)

yorg = nino34.values
y = yorg[lead_time + time_lag + shift:]

timey = nino34.index[lead_time + time_lag + shift:]
futuretime = pd.date_range(start='2019-01-01',
                           end=pd.to_datetime('2019-01-01') +
                           pd.tseries.offsets.MonthEnd(lead_time + shift),
                           freq='MS')

test_indeces = (timey >= '2012-01-01') & (timey <= '2017-12-01')

train_indeces = np.invert(test_indeces)
Esempio n. 4
0
                             taux_WP_mean,
                             c2_ssh,
                             H_ssh,
                             pca_dec
                             ), axis=1)

# scale each feature
scalerX = StandardScaler()
Xorg = scalerX.fit_transform(feature_unscaled)

# set nans to 0.
Xorg = np.nan_to_num(Xorg)

# arange the feature array
X = Xorg[:-lead_time-shift,:]
X = include_time_lag(X, max_lag=time_lag)

# arange label
yorg = oni.values
y = yorg[lead_time + time_lag + shift:]

# get the time axis of the label
timey = oni.index[lead_time + time_lag + shift:]

test_indeces = (timey>=f'2001-01-01') & (timey<=f'2011-12-01')
train_indeces = np.invert(test_indeces)
trainX, trainy = X[train_indeces,:], y[train_indeces]
testX, testy =  X[test_indeces,:], y[test_indeces]
model = DEM(layers=32, l1_hidden=0.001, verbose=1)

model.fit(trainX, trainy)
def pipeline_small(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """
    reader = data_reader(startdate='1960-01', enddate='2017-12')

    # indeces
    oni = reader.read_csv('oni')

    wwv = reader.read_csv('wwv_proxy')

    # seasonal cycle
    sc = np.cos(np.arange(len(oni)) / 12 * 2 * np.pi)

    # decadel variation of leading eof
    pca_dec = reader.read_statistic('pca',
                                    variable='dec_sst',
                                    dataset='ERSSTv5',
                                    processed='anom')['pca1']

    # time lag
    time_lag = 3

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack((oni, sc, wwv, pca_dec), axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)

    # arange the feature array
    X = Xorg[:-lead_time - shift, :]
    X = include_time_lag(X, max_lag=time_lag)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + time_lag + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + time_lag + shift:]

    if return_persistance:
        y_persistance = yorg[time_lag:-lead_time - shift]
        return X, y, timey, y_persistance
    else:
        return X, y, timey
Esempio n. 6
0
def pipeline(lead_time, return_persistance=False):
    """
    Data pipeline for the processing of the data before the Deep Ensemble
    is trained.

    :type lead_time: int
    :param lead_time: The lead time in month.

    :type return_persistance: boolean
    :param return_persistance: Return as the persistance as well.

    :returns: The feature "X" (at observation time), the label "y" (at lead
    time), the target season "timey" (least month) and if selected the
    label at observation time "y_persistance". Hence, the output comes as:
    X, y, timey, y_persistance.
    """      
    reader = data_reader(startdate='1960-01', enddate=endyr+'-'+endmth)

    # indices
    oni = reader.read_csv('oni')
    dmi = reader.read_csv('dmi')
    wwv = reader.read_csv('wwv_proxy')

    # seasonal cycle
    cos = np.cos(np.arange(len(oni))/12*2*np.pi)

    # wind stress
    taux = reader.read_netcdf('taux', dataset='NCEP', processed='anom')

    taux_WP = taux.loc[dict(lat=slice(2.5,-2.5), lon=slice(120, 160))]
    taux_WP_mean = taux_WP.mean(dim='lat').mean(dim='lon')

    # include values from 3 and 6 months previously as predictor variables
    n_lags = 3
    step = 3

    # shift such that lead time corresponds to the definition of lead time
    shift = 3

    # process features
    feature_unscaled = np.stack((oni,
                                 wwv,
                                 dmi,
                                 cos,
                                 taux_WP_mean
                                 ), axis=1)

    # scale each feature
    scalerX = StandardScaler()
    Xorg = scalerX.fit_transform(feature_unscaled)

    # set nans to 0.
    Xorg = np.nan_to_num(Xorg)
    np.save(join(infodir,'Xorg'), Xorg) 

    # arange the feature array
    X = Xorg[:-lead_time-shift,:]
    X = include_time_lag(X, n_lags=n_lags, step=step)

    # arange label
    yorg = oni.values
    y = yorg[lead_time + n_lags*step + shift:]

    # get the time axis of the label
    timey = oni.index[lead_time + n_lags*step + shift:]

    if return_persistance:
        y_persistance = yorg[n_lags*step: - lead_time - shift]
        return X, y, timey, y_persistance

    else:
        return X, y, timey
Esempio n. 7
0
from ninolearn.pathes import modeldir, infodir, preddir
from ninolearn.learn.models.dem import DEM
from ninolearn.learn.fit import decades

from s0_start import start_pred_y, start_pred_m


# =============================================================================
# Getting feature vector
# =============================================================================

Xorg = np.load(join(infodir,'Xorg.npy'))
# include values of 3 and 6 months previously
n_lags = 3
step = 3
X = include_time_lag(Xorg, n_lags = n_lags, step=step)
X = X[-1:,:] # now use only the latest observation to produce forecast


# =============================================================================
# For each lead time, load ensemble of models and make prediction
# =============================================================================

lead_times = np.load(join(infodir,'lead_times.npy'))
predictions = np.zeros((2,len(lead_times))) # first row: mean, second row: std

print_header("Making predictions")

for i in np.arange(len(lead_times)):
    print("Lead time "+str(lead_times[i])+" months")
    dem = DEM(layers=1, neurons = 32, dropout=0.05, noise_in=0.0, noise_sigma=0.,