def test_regular(device): """ Tests the LSTMTimeSeriesPredictor fitting """ cuda_check(device) start = time.time() tsp = TimeSeriesPredictor( BenchmarkLSTM(hidden_dim=16), lr=1e-3, lambda1=1e-8, optimizer__weight_decay=1e-8, iterator_train__shuffle=True, early_stopping=EarlyStopping(patience=50), max_epochs=250, train_split=CVSplit(10), optimizer=Adam, device=device, ) past_pattern_length = 24 future_pattern_length = 12 pattern_length = past_pattern_length + future_pattern_length fsd = FlightSeriesDataset(pattern_length, past_pattern_length, pattern_length, stride=1) tsp.fit(fsd) end = time.time() elapsed = timedelta(seconds=end - start) print(f"Fitting in {device} time delta: {elapsed}") mean_r2_score = tsp.score(tsp.dataset) print(f"Achieved R2 score: {mean_r2_score}") assert mean_r2_score > -20
def __init__(self, name: str, args: TrendArgs): self.args = args self.name = name self.model = TimeSeriesPredictor(n_hidden=args.n_hidden, window=args.seq_window, n_layers=args.n_layers, dropout=args.dropout) self.model_path = f"{args.model_path}/{name}.pt" if args.load_model: self.load_model()
def test_train_loss_monitor_no_train_split(): """ Tests the LSTMTimeSeriesPredictor fitting """ tsp = TimeSeriesPredictor(BenchmarkLSTM(hidden_dim=10), early_stopping=EarlyStopping( monitor='train_loss', patience=15), max_epochs=150, train_split=None, optimizer=torch.optim.Adam) tsp.fit(FlightsDataset()) mean_r2_score = tsp.score(tsp.dataset) assert mean_r2_score > -300
def test_train_loss_monitor(user_name, user_password): """ Tests the LSTMTimeSeriesPredictor fitting """ tsp = TimeSeriesPredictor( BenchmarkLSTM(hidden_dim=10), early_stopping=EarlyStopping(monitor='train_loss', patience=15), max_epochs=150, # train_split=None, # default = skorch.dataset.CVSplit(5) optimizer=torch.optim.Adam) tsp.fit(_get_dataset(user_name, user_password)) mean_r2_score = tsp.score(tsp.dataset) assert mean_r2_score > -300
def test_lstm_tsp_fitting_oze(user_name, user_password): """ Tests the LSTMTimeSeriesPredictor """ tsp = TimeSeriesPredictor( BenchmarkLSTM(hidden_dim=64), max_epochs=5, # train_split=None, # default = skorch.dataset.CVSplit(5) optimizer=torch.optim.Adam) dataset = _get_dataset(user_name, user_password) tsp.fit(dataset) mean_r2_score = tsp.score(tsp.dataset) assert mean_r2_score > -300
def test_transformer_tsp(device): '''univariate test''' cuda_check(device) start = time.time() tsp = TimeSeriesPredictor( Transformer(), max_epochs=50, train_split=None, device=device, ) tsp.fit(FlightsDataset()) score = tsp.score(tsp.dataset) assert score > -1 end = time.time() elapsed = timedelta(seconds=end - start) print(f"Fitting in {device} time delta: {elapsed}")
def test_quantum_lstm_tsp_fitting(device): """ Tests the Quantum LSTM TimeSeriesPredictor fitting """ cuda_check(device) tsp = TimeSeriesPredictor( QuantumLSTM(), lr=1E-1, max_epochs=50, train_split=None, optimizer=Adam, device=device ) start = time.time() tsp.fit(FlightsDataset()) end = time.time() elapsed = timedelta(seconds = end - start) print("Fitting in {} time delta: {}".format(device, elapsed)) mean_r2_score = tsp.score(tsp.dataset) assert mean_r2_score > -10
def test_no_train_split(): """ Tests the LSTMTimeSeriesPredictor fitting """ with pytest.raises(ValueError) as error: TimeSeriesPredictor(BenchmarkLSTM(hidden_dim=16), early_stopping=EarlyStopping(), max_epochs=500, train_split=None, optimizer=torch.optim.Adam) # pylint: disable=line-too-long assert error.match( 'Select a valid train_split or disable early_stopping! A valid train_split needs to be selected when valid_loss monitor is selected as early stopping criteria.' )
def test_transformer_tsp_multisamples(device): '''multivariate test''' cuda_check(device) start = time.time() tsp = TimeSeriesPredictor( Transformer(d_model=12), lr=1e-5, lambda1=1e-8, optimizer__weight_decay=1e-8, iterator_train__shuffle=True, early_stopping=EarlyStopping(patience=100), max_epochs=500, train_split=CVSplit(10), optimizer=Adam, device=device, ) past_pattern_length = 24 future_pattern_length = 12 pattern_length = past_pattern_length + future_pattern_length # pylint: disable-next=line-too-long fsd = FlightSeriesDataset(pattern_length, future_pattern_length, pattern_length, stride=1, generate_test_dataset=True) tsp.fit(fsd) end = time.time() elapsed = timedelta(seconds=end - start) print(f"Fitting in {device} time delta: {elapsed}") mean_r2_score = tsp.score(tsp.dataset) assert mean_r2_score > -0.5 netout = tsp.predict(fsd.test.x) idx = np.random.randint(0, len(fsd.test.x)) y_true = fsd.test.y[idx, :, :] y_hat = netout[idx, :, :] r2s = r2_score(y_true, y_hat) assert r2s > -1 print(f"Final R2 score: {r2s}")
def test_main(stride, test_main_context): context = test_main_context(stride) past_pattern_length = context['past_pattern_length'] future_pattern_length = context['future_pattern_length'] pattern_length = past_pattern_length + future_pattern_length tsp = TimeSeriesPredictor( BenchmarkLSTM( initial_forget_gate_bias=1, hidden_dim=7, num_layers=1, ), lr=context['lr'], lambda1=1e-8, optimizer__weight_decay=1e-8, iterator_train__shuffle=True, early_stopping=EarlyStopping(patience=100), max_epochs=500, train_split=CVSplit(context['n_cv_splits']), optimizer=Adam, ) fsd = FlightSeriesDataset(pattern_length, future_pattern_length, context['except_last_n'], stride=stride, generate_test_dataset=True) tsp.fit(fsd) mean_r2_score = tsp.score(tsp.dataset) assert mean_r2_score > context['mean_r2_score'] netout = tsp.predict(fsd.test.x) idx = np.random.randint(0, len(fsd.test.x)) y_true = fsd.test.y[idx, :, :] y_hat = netout[idx, :, :] r2s = r2_score(y_true, y_hat) print("Final R2 score: {}".format(r2s)) assert r2s > context['final_r2_score']
def test_quantum_lstm_tsp_forecast(device): """ Tests the Quantum LSTM forecast """ cuda_check(device) tsp = TimeSeriesPredictor( QuantumLSTM(hidden_dim = 2), max_epochs=250, lr = 1e-4, early_stopping=EarlyStopping(patience=100, monitor='train_loss'), train_split=None, optimizer=Adam, device=device ) whole_fd = FlightsDataset() # leave last N months for error assertion last_n = 24 start = time.time() tsp.fit(FlightsDataset(pattern_length = 120, except_last_n = last_n)) end = time.time() elapsed = timedelta(seconds = end - start) print(f"Fitting in {device} time delta: {elapsed}") mean_r2_score = tsp.score(tsp.dataset) assert mean_r2_score > -5 netout, _ = tsp.forecast(last_n) # Select any training example just for comparison idx = np.random.randint(0, len(tsp.dataset)) _, whole_y = whole_fd[idx] y_true = whole_y[-last_n:, :] # get only known future outputs y_pred = netout[idx, -last_n:, :] # get only last N predicted outputs r2s = r2_score(y_true, y_pred) assert r2s > -60
class Company: scaler: MinMaxScaler train_data: torch.Tensor test_data: torch.Tensor train_sequences: List[Tuple[torch.Tensor, torch.Tensor]] test_sequences: List[Tuple[torch.Tensor, torch.Tensor]] inputs: torch.Tensor targets: torch.Tensor data: pd.DataFrame model: TimeSeriesPredictor def __init__(self, name: str, args: TrendArgs): self.args = args self.name = name self.model = TimeSeriesPredictor(n_hidden=args.n_hidden, window=args.seq_window, n_layers=args.n_layers, dropout=args.dropout) self.model_path = f"{args.model_path}/{name}.pt" if args.load_model: self.load_model() def load_model(self): self.model.load_state_dict(torch.load(self.model_path)) def save_model(self): torch.save(self.model.state_dict(), self.model_path) def load_data_from_yahoo(self) -> pd.DataFrame: actual_date = dt.date.today() past_date = actual_date - dt.timedelta(days=self.args.days) actual_date = actual_date.strftime("%Y-%m-%d") past_date = past_date.strftime("%Y-%m-%d") data = yf.download(self.name, start=past_date, end=actual_date) return pd.DataFrame(data=data) def load_data(self): data_path = f"{self.args.stock_dataset}/{self.name}.csv" if self.args.load_from_yahoo: data = self.load_data_from_yahoo() data.to_csv(data_path) else: data = pd.read_csv(data_path) self.data = data def init_train_test_data(self): self.scaler = MinMaxScaler(feature_range=(-1, 1)) df = self.data data = np.array(df.Close.tolist()) n = math.ceil(data.shape[0] * self.args.split_n) train_data, test_data = data[:n], data[n:] self.train_data = norm_data(train_data, self.scaler) self.test_data = norm_data(test_data, self.scaler) self.train_sequences = create_inout_sequences(self.train_data, self.args.seq_window) self.test_sequences = create_inout_sequences(self.test_data, self.args.seq_window) inputs = [] targets = [] for seq, target in self.train_sequences: inputs += [seq] targets += target self.inputs = torch.stack(inputs) self.targets = torch.stack(targets) def test(self) -> float: self.model.eval() self.model.cpu() total_loss = 0 state = None for inputs, target in self.test_sequences: with torch.no_grad(): out, state, loss = self.model(inputs.unsqueeze(0), state, target=target) total_loss += loss.item() return total_loss / len(self.test_data) def predict(self, history: np.ndarray, n=1) -> np.ndarray: scaler = MinMaxScaler(feature_range=(-1, 1)) x = scaler.fit_transform(history.reshape(-1, 1)).reshape(1, -1) # noinspection PyArgumentList x = torch.FloatTensor(x) state = None for i in range(n): with torch.no_grad(): y, state = self.model(x, state) x = torch.cat((x.view((-1, 1)), y)).view(1, -1) x = x[:, 1:] return scaler.inverse_transform(x.reshape(-1, 1).numpy())[-n:].reshape(-1)