modified_time = time.gmtime(os.path.getmtime(out_dir)) compare_time = time.strptime("21-7-2019 13:00 UTC", "%d-%m-%Y %H:%M %Z") if modified_time > compare_time: print("Trained already!") continue test_indeces = (timey >= f'{1902+decade}-01-01') & ( timey <= f'{1911+decade}-12-01') train_indeces = np.invert(test_indeces) trainX, trainy = X[train_indeces, :], y[train_indeces] model = DEM() model.set_parameters(layers=1, dropout=[0.1, 0.5], noise_in=[0.1, 0.5], noise_sigma=[0.1, 0.5], noise_mu=[0.1, 0.5], l1_hidden=[0.0, 0.2], l2_hidden=[0, 0.2], l1_mu=[0.0, 0.2], l2_mu=[0.0, 0.2], l1_sigma=[0.0, 0.2], l2_sigma=[0.0, 0.2], lr=[0.0001, 0.01], batch_size=100, epochs=500,
H_ssh, pca_dec ), axis=1) # scale each feature scalerX = StandardScaler() Xorg = scalerX.fit_transform(feature_unscaled) # set nans to 0. Xorg = np.nan_to_num(Xorg) # arange the feature array X = Xorg[:-lead_time-shift,:] X = include_time_lag(X, max_lag=time_lag) # arange label yorg = oni.values y = yorg[lead_time + time_lag + shift:] # get the time axis of the label timey = oni.index[lead_time + time_lag + shift:] test_indeces = (timey>=f'2001-01-01') & (timey<=f'2011-12-01') train_indeces = np.invert(test_indeces) trainX, trainy = X[train_indeces,:], y[train_indeces] testX, testy = X[test_indeces,:], y[test_indeces] model = DEM(layers=32, l1_hidden=0.001, verbose=1) model.fit(trainX, trainy) #%% pred = model.predict(testX)
pd.tseries.offsets.MonthEnd(lead_time + shift), freq='MS') test_indeces = (timey >= '2012-01-01') & (timey <= '2017-12-01') train_indeces = np.invert(test_indeces) trainX, trainy, traintimey = X[ train_indeces, :], y[train_indeces], timey[train_indeces] testX, testy, testtimey = X[ test_indeces, :], y[test_indeces], timey[test_indeces] #%% ============================================================================= # Deep ensemble # ============================================================================= model = DEM() model.set_parameters(layers=1, neurons=16, dropout=[0.0, 0.5], noise_in=[0.0, 0.5], noise_mu=0.0, noise_sigma=0.0, noise_alpha=0., l1_hidden=[0, 0.2], l2_hidden=[0., 0.2], l1_mu=0., l2_mu=0., l1_sigma=0, l2_sigma=0., l1_alpha=0.,
pred_std_full = np.array([]) pred_persistance_full = np.array([]) ytrue = np.array([]) timeytrue = pd.DatetimeIndex([]) for j in range(n_decades): decade = decades_arr[j] # free some memory K.clear_session() # make predictions print(f'Predict: {1902+decade}-01-01 till {1911+decade}-12-01') ens_dir = f'ensemble_decade{decade}_lead{lead_time}' model = DEM() model.load(location=modeldir, dir_name=ens_dir) test_indeces = (timey >= f'{1902+decade}-01-01') & ( timey <= f'{1911+decade}-12-01') testX, testy, testtimey = X[ test_indeces, :], y[test_indeces], timey[test_indeces] pred_mean, pred_std = model.predict(testX) # make the full time series pred_mean_full = np.append(pred_mean_full, pred_mean) pred_std_full = np.append(pred_std_full, pred_std) ytrue = np.append(ytrue, testy) timeytrue = timeytrue.append(testtimey)
def test_DEM_set_paramters(): """ This test checks if the model that one intends to build with the method .set_paramters() of an DEM instance is the one that is actually produced in the backend by Keras. """ model = DEM() layers = np.random.randint(1, 5) neurons = np.random.randint(10, 500) n_features = np.random.randint(10, 500) l1_hidden = np.random.uniform(0, 1) l2_hidden = np.random.uniform(0, 1) l1_mu = np.random.uniform(0, 1) l2_mu = np.random.uniform(0, 1) l1_sigma = np.random.uniform(0, 1) l2_sigma = np.random.uniform(0, 1) dropout_rate = np.random.uniform(0, 1) noise_in = np.random.uniform(0, 1) noise_mu = np.random.uniform(0, 1) noise_sigma = np.random.uniform(0, 1) model.set_parameters(layers=layers, neurons=neurons, dropout=dropout_rate, noise_in=noise_in, noise_sigma=noise_sigma, noise_mu=noise_mu, l1_hidden=l1_hidden, l2_hidden=l2_hidden, l1_mu=l1_mu, l2_mu=l2_mu, l1_sigma=l1_sigma, l2_sigma=l2_sigma, batch_size=10, n_segments=5, n_members_segment=1, lr=0.001, patience=10, epochs=300, verbose=0, std=True) member = model.build_model(n_features) # check input shape assert n_features == member.input_shape[1] # check input noise layer noise_in_config = member.get_layer(name=f'noise_input').get_config() assert noise_in_config['stddev'] == noise_in for i in range(model.hyperparameters['layers']): # check the hidden layer hidden_config = member.get_layer(name=f'hidden_{i}').get_config() assert hidden_config['activation'] == 'relu' assert hidden_config['units'] == neurons check_regularizer(hidden_config, class_name='L1L2', l1=l1_hidden, l2=l2_hidden) # check the dropout layer hidden_dropout_config = member.get_layer( name=f'hidden_dropout_{i}').get_config() assert hidden_dropout_config['rate'] == dropout_rate # check the mean output neuron mu = member.get_layer(name='mu_output') mu_config = mu.get_config() assert mu_config['activation'] == 'linear' check_regularizer(mu_config, class_name='L1L2', l1=l1_mu, l2=l2_mu) # check standard deviation output neuron sigma = member.get_layer(name='sigma_output') sigma_config = sigma.get_config() assert sigma_config['activation'] == 'softplus' check_regularizer(sigma_config, class_name='L1L2', l1=l1_sigma, l2=l2_sigma) # check mu noise layer noise_in_config = member.get_layer(name=f'noise_mu').get_config() assert noise_in_config['stddev'] == noise_mu # check sigma noise layer noise_in_config = member.get_layer(name=f'noise_sigma').get_config() assert noise_in_config['stddev'] == noise_sigma
for i in range(len(y)): y[i] = yraw[i] + np.random.normal(scale=z[i]) # process features feature_unscaled = np.stack((yraw, z), axis=1) # scale each feature scalerX = StandardScaler() X= scalerX.fit_transform(feature_unscaled) return X, y if __name__=="__main__": X,y = pipeline_example() Xtrain, Xval, Xtest = test model = DEM(layers=1, neurons = 16, dropout=0.0, noise_in=0.0, noise_sigma=0., noise_mu=0., l1_hidden=0, l2_hidden=0., l1_mu=0., l2_mu=0., l1_sigma=0.0, l2_sigma=0.0, lr=0.001, batch_size=100, epochs=5000, n_segments=5, n_members_segment=1, patience=100, verbose=0, pdf="normal", name="dem_example") model.fit()
# ============================================================================= # For each lead time, load ensemble of models and make prediction # ============================================================================= lead_times = np.load(join(infodir,'lead_times.npy')) predictions = np.zeros((2,len(lead_times))) # first row: mean, second row: std print_header("Making predictions") for i in np.arange(len(lead_times)): print("Lead time "+str(lead_times[i])+" months") dem = DEM(layers=1, neurons = 32, dropout=0.05, noise_in=0.0, noise_sigma=0., noise_mu=0., l1_hidden=0.0, l2_hidden=0., l1_mu=0, l2_mu=0., l1_sigma=0, l2_sigma=0.0, lr=0.01, batch_size=100, epochs=5000, n_segments=5, n_members_segment=3, patience=25, activation='tanh', verbose=0, pdf="normal", name="gdnn_ex_pca") for j in decades[:-1]: dem.load(location=modeldir, dir_name = 'gdnn_ex_pca_decade'+str(j)+'_lead'+str(lead_times[i])) pred = dem.predict(X) predictions[0,i] = pred[0][0] # mean predictions[1,i] = pred[1][0] # std # ============================================================================= # Save predictions # ============================================================================= # Translate months to 3-month seasons centered around central month
pred_std_full = np.array([]) pred_persistance_full = np.array([]) ytrue = np.array([]) timeytrue = pd.DatetimeIndex([]) for j in range(n_decades): decade = decades_arr[j] # free some memory K.clear_session() # make predictions print(f'Predict: {1902+decade}-01-01 till {1911+decade}-12-01') ens_dir = f'ensemble_decade{decade}_lead{lead_time}' model = DEM() model.load(location=modeldir, dir_name=ens_dir) test_indeces = (timey >= f'{1902+decade}-01-01') & ( timey <= f'{1911+decade}-12-01') testX, testy, testtimey = X[ test_indeces, :], y[test_indeces], timey[test_indeces] pred_mean, pred_std = model.predict(testX) pred_pers = y_persistance[test_indeces] # calculate the decadel scores decadel_corr[j, i], decadel_p[j, i] = pearsonr(testy, pred_mean) decadel_corr_pres[j, i], decadel_p_pers[j,