def test_equations_preds(self, n_step: int): from torch_kalman.utils.data import TimeSeriesDataset from pandas import DataFrame class LinearModelFixed(LinearModel): def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) self.no_icov_state_elements = self.state_elements kf = KalmanFilter( processes=[LinearModelFixed(id='lm', predictors=['x1', 'x2'])], measures=['y'], compiled=False) kf.script_module._scale_by_measure_var = False kf.state_dict( )['script_module.processes.lm.init_mean'][:] = torch.tensor( [1.5, -0.5]) kf.state_dict( )['script_module.measure_covariance.cholesky_log_diag'][0] = np.log( .1**.5) num_times = 100 df = DataFrame({ 'x1': np.random.randn(num_times), 'x2': np.random.randn(num_times) }) df['y'] = 1.5 * df['x1'] + -.5 * df['x2'] + .1 * np.random.randn( num_times) df['time'] = df.index.values df['group'] = '1' dataset = TimeSeriesDataset.from_dataframe(dataframe=df, group_colname='group', time_colname='time', dt_unit=None, X_colnames=['x1', 'x2'], y_colnames=['y']) y, X = dataset.tensors from pandas import Series pred = kf(y, X=X, out_timesteps=X.shape[1], n_step=n_step) y_series = Series(y.squeeze().numpy()) for shift in range(-2, 3): resid = y_series.shift(shift) - Series( pred.means.squeeze().numpy()) if shift: # check there's no misalignment in internal n_step logic (i.e., realigning the input makes things worse) self.assertGreater((resid**2).mean(), 1.) else: self.assertLess((resid**2).mean(), .02)
# + {"hidePrompt": true, "cell_type": "markdown"} # #### Prepare our Dataset # # One of the key advantages of `torch-kalman` is the ability to train on a batch of time-serieses, instead of training a separate model for each individually. The `TimeSeriesDataset` is similar to PyTorch's native `TensorDataset`, with some useful metadata on the batch of time-serieses (the station names, the dates for each). # + # preprocess our measures of interest: measures = ['SO2', 'PM10'] measures_pp = [m + '_log10_scaled' for m in measures] df_aq_weekly[measures_pp] = np.log10(df_aq_weekly[measures] / col_means[measures]) # create a dataset: dataset_all = TimeSeriesDataset.from_dataframe(dataframe=df_aq_weekly, dt_unit='W', measure_colnames=measures_pp, group_colname='station', time_colname='date') # Train/Val split: dataset_train, dataset_val = dataset_all.train_val_split(dt=SPLIT_DT) dataset_train, dataset_val # - # #### Specify our Model # # The `KalmanFilter` subclasses `torch.nn.Module`. We specify the model by passing `processes` that capture the behaviors of our `measures`. processes = [] for measure in measures_pp: processes.extend([
def test_training3(self): """ Test TBATS and TimeSeriesDataset integration """ try: import pandas as pd except ImportError: # not a package requirement return torch.manual_seed(123) df = pd.DataFrame({ 'sin': np.sin(2. * 3.1415 * np.arange(0., 5 * 7.) / 7.), 'cos': np.cos(2. * 3.1415 * np.arange(0., 5 * 7.) / 7.) }) df['y'] = df['cos'].where(df.index < 12, other=df['sin']) df = pd.concat([ df.assign(observed=lambda df: df['y'] + np.random.normal( scale=.2, size=len(df.index)), group=str(i + 1), time=lambda df: np.array(df.index.tolist(), dtype='datetime64[D]') + np. random.randint(low=0, high=4)) for i in range(10) ]) dataset = TimeSeriesDataset.from_dataframe(df, group_colname='group', time_colname='time', dt_unit='D', measure_colnames=['y']) def _train(num_epochs: int = 15): kf = KalmanFilter(processes=[ TBATS(id='day_of_week', period=7, dt_unit='D', K=1, process_variance=True, decay=(.85, 1.)) ], measures=['y']) # train: optimizer = torch.optim.LBFGS(kf.parameters(), lr=.15, max_iter=10) def closure(): optimizer.zero_grad() with warnings.catch_warnings(): warnings.simplefilter("ignore") pred = kf(dataset.tensors[0], start_datetimes=dataset.start_datetimes) loss = -pred.log_prob(dataset.tensors[0]).mean() loss.backward() return loss print(f"\nTraining for {num_epochs} epochs...") for i in range(num_epochs): loss = optimizer.step(closure) print("loss:", loss.item()) return kf kf = None for i in range(MAX_TRIES): try: kf = _train() except RuntimeError as e: if 'cholesky' not in str(e): raise e if kf is not None: break if kf is None: raise RuntimeError("MAX_TRIES exceeded") with torch.no_grad(): pred = kf(dataset.tensors[0], start_datetimes=dataset.start_datetimes) df_pred = pred.to_dataframe(dataset) self.assertLess(np.mean((df_pred['actual'] - df_pred['mean'])**2), .05)