def test_stacks(stack_cls, block_types, expected_rmse): fdw = 28 fw = 7 x_train, y_train, x_test, y_test = simple_seq_data(nrows=1000, freq="1H", fdw=fdw, fw=fw, test_size=0.2) inputs = keras.Input(shape=(fdw, 1)) outputs = keras.layers.Reshape((fdw, ))(inputs) stack = stack_cls( fdw=fdw, fw=fw, block_types=block_types, block_units=8, block_theta_units=8, block_kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), ) if stack_cls == ResidualInputStack: _, forecast = stack([outputs, outputs]) else: _, forecast = stack(outputs) model = keras.Model(inputs=inputs, outputs=forecast) model.compile(optimizer=keras.optimizers.Adam(0.01), loss=keras.losses.MeanSquaredError()) model.fit(x_train, y_train, epochs=5, batch_size=32, shuffle=False) y_pred = model.predict(x_test) assert np.all(np.isfinite(y_pred)) error = rmse(y_test, y_pred) assert error < expected_rmse
def test_positional_encoding(): fdw = 28 fw = 7 x_train, y_train, x_test, y_test = simple_seq_data(nrows=1000, freq="1H", fdw=fdw, fw=fw, test_size=0.2) inputs = keras.Input(shape=(fdw, 1)) outputs = PositionalEncoding(8)(inputs) outputs = keras.layers.Concatenate()([inputs, outputs]) outputs = keras.layers.Flatten()(outputs) outputs = keras.layers.Dense( fw, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), activation="linear", )(outputs) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer=keras.optimizers.Adam(0.01), loss=keras.losses.MeanSquaredError()) model.fit(x_train, y_train, epochs=5, batch_size=32, shuffle=False) y_pred = model.predict(x_test) assert np.all(np.isfinite(y_pred)) error = rmse(y_test, y_pred) assert error < 0.5
def nbeast_residual_test_scenarious(): args = { "block_units": 8, "block_theta_units": 8, "block_layers": 4, "block_kernel_initializer": get_initializer("glorot_uniform", RANDOM_SEED), "block_bias_initializer": "zeros", } tests = [ [ StackDef(StackTypes.RESIDUAL_INPUT, block_types=[BlockTypes.GENERIC, BlockTypes.TREND], **args), StackDef(StackTypes.RESIDUAL_INPUT, block_types=[BlockTypes.SEASONAL, BlockTypes.TREND], **args), ], [ StackDef(StackTypes.RESIDUAL_INPUT, block_types=[BlockTypes.GENERIC, BlockTypes.GENERIC], **args), ], ] return tests
def test_nbeats_residual_parameters_validation(): args = { "block_units": 8, "block_theta_units": 8, "block_layers": 4, "block_kernel_initializer": get_initializer("glorot_uniform", RANDOM_SEED), "block_bias_initializer": "zeros", } with pytest.raises(ValueError) as excinfo: NBEATSResidual( fdw=28, fw=12, stacks=[ StackDef(StackTypes.RESIDUAL_INPUT, block_types=[BlockTypes.GENERIC, BlockTypes.TREND], **args), StackDef(StackTypes.LAST_FORWARD, block_types=[BlockTypes.GENERIC, BlockTypes.TREND], **args), ], ) assert str(excinfo.value) == ( "RESIDUAL-INPUT model supports RESIDUAL-INPUT stacks only. " "Found: {<StackTypes.LAST_FORWARD: 3>}")
def test_base_block(): fdw = 28 fw = 7 x_train, y_train, x_test, y_test = simple_seq_data(nrows=1000, freq="1H", fdw=fdw, fw=fw, test_size=0.2) inputs = keras.Input(shape=(fdw, 1)) outputs = keras.layers.Reshape((fdw, ))(inputs) _, outputs = Block( units=8, theta_units=8, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), )(inputs) outputs = keras.layers.Flatten()(outputs) outputs = keras.layers.Dense(fw)(outputs) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer=keras.optimizers.Adam(0.01), loss=keras.losses.MeanSquaredError()) model.fit(x_train, y_train, epochs=5, batch_size=32, shuffle=False) y_pred = model.predict(x_test) assert np.all(np.isfinite(y_pred)) error = rmse(y_test, y_pred) assert error < 0.5
def test_multi_head_attention_lookahead_mask(): fdw = 28 fw = 7 attention_dim = 32 num_heads = 4 x_train, y_train, x_test, y_test = simple_seq_data(nrows=1000, freq="1H", fdw=fdw, fw=fw, test_size=0.2) triu = np.triu(np.ones((fdw, fdw))) mask = np.stack([triu for _ in range(x_train.shape[0])]) mask = np.expand_dims(mask, axis=1) inputs = keras.Input(shape=(fdw, 1)) lookahead_mask = keras.Input(shape=(1, fdw, fdw)) outputs, attention_weights = MultiHeadAttention( attention_dim=attention_dim, num_heads=num_heads, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), )([inputs, inputs, inputs], mask=lookahead_mask) outputs = keras.layers.Reshape( (fdw * attention_dim * num_heads, ))(outputs) outputs = keras.layers.Dense( fw, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), activation="linear", )(outputs) model = keras.Model(inputs=[inputs, lookahead_mask], outputs=outputs) model.compile( optimizer=keras.optimizers.Adam(0.01), loss=keras.losses.MeanSquaredError(), ) model.fit([x_train, mask], y_train, epochs=5, batch_size=32, shuffle=False) mask = np.stack([triu for _ in range(x_test.shape[0])]) mask = np.expand_dims(mask, axis=1) y_pred = model.predict([x_test, mask]) assert np.all(np.isfinite(y_pred)) error = rmse(y_test, y_pred) assert error < 0.5
def test_multi_head_attention_padding_mask(): fdw = 28 fw = 7 attention_dim = 32 num_heads = 4 x_train, y_train, x_test, y_test = simple_seq_data(nrows=1000, freq="1H", fdw=fdw, fw=fw, test_size=0.2) random_state = np.random.RandomState(RANDOM_SEED) mask = (random_state.random( (x_train.shape[0], 1, 1, x_train.shape[1])) > 0.3).astype(np.int) inputs = keras.Input(shape=(fdw, 1)) padding_mask = keras.Input(shape=(1, 1, fdw)) outputs, attention_weights = MultiHeadAttention( attention_dim=attention_dim, num_heads=num_heads, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), )([inputs, inputs, inputs], mask=padding_mask) outputs = keras.layers.Reshape( (fdw * attention_dim * num_heads, ))(outputs) outputs = keras.layers.Dense( fw, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), activation="linear", )(outputs) model = keras.Model(inputs=[inputs, padding_mask], outputs=outputs) model.compile( optimizer=keras.optimizers.Adam(0.01), loss=keras.losses.MeanSquaredError(), ) model.fit([x_train, mask], y_train, epochs=5, batch_size=32, shuffle=False) mask = (random_state.random( (x_test.shape[0], 1, 1, x_test.shape[1])) > 0.3).astype(np.int) y_pred = model.predict([x_test, mask]) assert np.all(np.isfinite(y_pred)) error = rmse(y_test, y_pred) assert error < 0.5
def test_transformer(num_layers, layer_norm_epsilon, dff): fdw = 28 fw = 7 x_train, y_train, x_test, y_test = simple_seq_data(nrows=1000, freq="1H", fdw=fdw, fw=fw, test_size=0.2) transformer = Transformer( num_layers=num_layers, attention_dim=32, num_heads=4, hidden_activation="linear", dff=dff, hidden_kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), attention_kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), pwffn_kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), output_kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), layer_norm_epsilon=layer_norm_epsilon, ) transformer.compile(loss=keras.losses.MeanSquaredError(), optimizer=keras.optimizers.Adam(0.001)) decoder_inputs = create_decoder_inputs(y_train, go_token=0) transformer.fit([x_train, decoder_inputs], y_train, epochs=5, batch_size=32, verbose=1) dec_inp = create_empty_decoder_inputs(x_test.shape[0], go_token=0) y_pred, weights = transformer.predict([x_test, dec_inp]) assert rmse(y_test, y_pred) < 1.1 assert len(weights["encoder_attention"]) == num_layers assert len(weights["decoder_attention"]) == num_layers assert len(weights["encoder_decoder_attention"]) == num_layers
def test_multi_head_attention(): fdw = 28 fw = 7 attention_dim = 32 num_heads = 4 x_train, y_train, x_test, y_test = simple_seq_data(nrows=1000, freq="1H", fdw=fdw, fw=fw, test_size=0.2) inputs = keras.Input(shape=(fdw, 1)) outputs, attention_weights = MultiHeadAttention( attention_dim=attention_dim, num_heads=num_heads, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), )([inputs, inputs, inputs]) outputs = keras.layers.Reshape( (fdw * attention_dim * num_heads, ))(outputs) outputs = keras.layers.Dense( fw, kernel_initializer=get_initializer("glorot_uniform", RANDOM_SEED), activation="linear", )(outputs) model = keras.Model(inputs=inputs, outputs=outputs) model.compile(optimizer=keras.optimizers.Adam(0.01), loss=keras.losses.MeanSquaredError()) model.fit(x_train, y_train, epochs=5, batch_size=32, shuffle=False) y_pred = model.predict(x_test) assert np.all(np.isfinite(y_pred)) error = rmse(y_test, y_pred) assert error < 0.5
def nbeats_test_scenarious(): args = { "block_units": 8, "block_theta_units": 8, "block_layers": 4, "block_kernel_initializer": get_initializer("glorot_uniform", RANDOM_SEED), "block_bias_initializer": "zeros", } tests = [ [ StackDef(StackTypes.NBEATS_DRESS, block_types=[BlockTypes.GENERIC, BlockTypes.TREND], **args), ], [ StackDef(StackTypes.PARALLEL, block_types=[BlockTypes.TREND, BlockTypes.GENERIC], **args), StackDef(StackTypes.PARALLEL, block_types=[BlockTypes.TREND, BlockTypes.SEASONAL], **args), StackDef(StackTypes.PARALLEL, block_types=[BlockTypes.SEASONAL, BlockTypes.SEASONAL], **args), ], [ StackDef(StackTypes.NO_RESIDUAL, block_types=[BlockTypes.TREND, BlockTypes.SEASONAL], **args), StackDef(StackTypes.NO_RESIDUAL, block_types=[BlockTypes.SEASONAL, BlockTypes.TREND], **args), ], ] return tests