def test_timesteps_pass(self, timesteps): model = build_LSTM_training(timesteps=timesteps) # Iterate over all layers apart from output for layer in model.layers[:-1]: actual_timesteps = layer.get_config()["batch_input_shape"][1] assert actual_timesteps == timesteps
def test_batch_size_pass(self, batch_size): model = build_LSTM_training(batch_size=batch_size) # Iterate over all layers apart from output for layer in model.layers[:-1]: actual_batch_size = layer.get_config()["batch_input_shape"][0] assert actual_batch_size == batch_size
def make_predictions(input_model_path): check_all_needed_files_exist(input_model_path) num_days = 9 bitcoin = get_last_num_days_hourly_bitcoin_data(num_days) bitcoin = bitcoin.price.values.reshape(-1, 1) bitcoin = np.log(bitcoin) with open(f"{input_model_path}_scaler.pkl", "rb") as f: min_max = pickle.load(f) bitcoin_preprocessed = min_max.transform(bitcoin) bitcoin_ds = make_tf_dataset( bitcoin_preprocessed, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=0, # Only feed in single batches for inference for flexibility batch_size=1, ) inference_model = build_LSTM_training(batch_size=1, timesteps=INPUT_SEQ_LENGTH) inference_model.load_weights(f"{input_model_path}_weights.h5") preds = inference_model.predict(bitcoin_ds) preds = pd.Series(preds.reshape(-1, ), name="preds") return preds
def test_optimizer(self, optimizer): model = build_LSTM_training(optimizer=optimizer) opt_dict = { "adam": Adam, "rmsprop": RMSprop, } expected_opt = opt_dict[optimizer.lower()] assert isinstance(model.optimizer, expected_opt)
def test_num_layers_type_error(self, num_layers): with pytest.raises(TypeError): model = build_LSTM_training(num_layers=num_layers)
def test_num_layers_pass(self, num_layers): model = build_LSTM_training(num_layers=num_layers) # num_layers is all the LSTM layers you add (not including the output layer) assert len(model.layers) == num_layers + 1
def test_timesteps_fail_type(self, timesteps): with pytest.raises(TypeError): model = build_LSTM_training(timesteps=timesteps)
def test_batch_size_fail_type(self, batch_size): with pytest.raises(TypeError): model = build_LSTM_training(batch_size=batch_size)
def test_units_fail_type(self, units): with pytest.raises(TypeError): model = build_LSTM_training(units=units)
def test_units_pass(self, units): model = build_LSTM_training(units=units) # Iterate over all layers apart from output for layer in model.layers[:-1]: assert layer.units == units
def test_loss(self, loss): model = build_LSTM_training(loss=loss) assert model.loss == loss
def test_learning_rate(self, learning_rate): model = build_LSTM_training(learning_rate=learning_rate) assert model.optimizer.learning_rate.numpy() == np.float32( learning_rate)
def train_model(output_model_name): if not output_model_name.endswith(".h5"): raise ValueError( f"You must pass a model name with a .h5 extension at the end. " f"Received: {output_model_name}") output_dir = Path("models") if not output_dir.exists(): output_dir.mkdir() # Remove .h5 at the end output_model_name = output_model_name[:-3] bitcoin = load_raw_bitcoin_df() # In total we have: ~70% training, 20% val, 10% test train, test = temporal_train_test_split(bitcoin, train_size=0.05) train, val = temporal_train_test_split(train, train_size=0.77) train = np.log(train) val = np.log(val) test = np.log(test) min_max = MinMaxScaler() train = min_max.fit_transform(train) val = min_max.transform(val) test = min_max.transform(test) with open(output_dir / f"{output_model_name}_scaler.pkl", "wb") as f: pickle.dump(min_max, f) train_ds = make_tf_dataset( train, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=OUTPUT_SEQ_LENGTH, batch_size=BATCH_SIZE_TRAINING, ) val_ds = make_tf_dataset( val, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=OUTPUT_SEQ_LENGTH, batch_size=BATCH_SIZE_TRAINING, ) test_ds = make_tf_dataset( test, input_seq_length=INPUT_SEQ_LENGTH, output_seq_length=OUTPUT_SEQ_LENGTH, batch_size=1, ) model = build_LSTM_training(batch_size=BATCH_SIZE_TRAINING, timesteps=INPUT_SEQ_LENGTH) early_stop_cb = EarlyStopping(patience=10, restore_best_weights=True, baseline=None) callbacks = [early_stop_cb] history = model.fit( train_ds, epochs=10, shuffle=False, validation_data=val_ds, callbacks=callbacks, batch_size=BATCH_SIZE_TRAINING, ) model.save(output_dir / f"{output_model_name}") model.save_weights(output_dir / f"{output_model_name}_weights.h5")