Ejemplo n.º 1
0
    def test_equations_preds(self, n_step: int):
        from torch_kalman.utils.data import TimeSeriesDataset
        from pandas import DataFrame

        class LinearModelFixed(LinearModel):
            def __init__(self, *args, **kwargs):
                super().__init__(*args, **kwargs)
                self.no_icov_state_elements = self.state_elements

        kf = KalmanFilter(
            processes=[LinearModelFixed(id='lm', predictors=['x1', 'x2'])],
            measures=['y'],
            compiled=False)
        kf.script_module._scale_by_measure_var = False
        kf.state_dict(
        )['script_module.processes.lm.init_mean'][:] = torch.tensor(
            [1.5, -0.5])
        kf.state_dict(
        )['script_module.measure_covariance.cholesky_log_diag'][0] = np.log(
            .1**.5)

        num_times = 100
        df = DataFrame({
            'x1': np.random.randn(num_times),
            'x2': np.random.randn(num_times)
        })
        df['y'] = 1.5 * df['x1'] + -.5 * df['x2'] + .1 * np.random.randn(
            num_times)
        df['time'] = df.index.values
        df['group'] = '1'
        dataset = TimeSeriesDataset.from_dataframe(dataframe=df,
                                                   group_colname='group',
                                                   time_colname='time',
                                                   dt_unit=None,
                                                   X_colnames=['x1', 'x2'],
                                                   y_colnames=['y'])
        y, X = dataset.tensors

        from pandas import Series

        pred = kf(y, X=X, out_timesteps=X.shape[1], n_step=n_step)
        y_series = Series(y.squeeze().numpy())
        for shift in range(-2, 3):
            resid = y_series.shift(shift) - Series(
                pred.means.squeeze().numpy())
            if shift:
                # check there's no misalignment in internal n_step logic (i.e., realigning the input makes things worse)
                self.assertGreater((resid**2).mean(), 1.)
            else:
                self.assertLess((resid**2).mean(), .02)
Ejemplo n.º 2
0
# + {"hidePrompt": true, "cell_type": "markdown"}
# #### Prepare our Dataset
#
# One of the key advantages of `torch-kalman` is the ability to train on a batch of time-serieses, instead of training a separate model for each individually. The `TimeSeriesDataset` is similar to PyTorch's native `TensorDataset`, with some useful metadata on the batch of time-serieses (the station names, the dates for each).

# +
# preprocess our measures of interest:
measures = ['SO2', 'PM10']
measures_pp = [m + '_log10_scaled' for m in measures]
df_aq_weekly[measures_pp] = np.log10(df_aq_weekly[measures] /
                                     col_means[measures])

# create a dataset:
dataset_all = TimeSeriesDataset.from_dataframe(dataframe=df_aq_weekly,
                                               dt_unit='W',
                                               measure_colnames=measures_pp,
                                               group_colname='station',
                                               time_colname='date')

# Train/Val split:
dataset_train, dataset_val = dataset_all.train_val_split(dt=SPLIT_DT)
dataset_train, dataset_val
# -

# #### Specify our Model
#
# The `KalmanFilter` subclasses `torch.nn.Module`. We specify the model by passing `processes` that capture the behaviors of our `measures`.

processes = []
for measure in measures_pp:
    processes.extend([
Ejemplo n.º 3
0
    def test_training3(self):
        """
        Test TBATS and TimeSeriesDataset integration
        """
        try:
            import pandas as pd
        except ImportError:  # not a package requirement
            return
        torch.manual_seed(123)
        df = pd.DataFrame({
            'sin':
            np.sin(2. * 3.1415 * np.arange(0., 5 * 7.) / 7.),
            'cos':
            np.cos(2. * 3.1415 * np.arange(0., 5 * 7.) / 7.)
        })
        df['y'] = df['cos'].where(df.index < 12, other=df['sin'])

        df = pd.concat([
            df.assign(observed=lambda df: df['y'] + np.random.normal(
                scale=.2, size=len(df.index)),
                      group=str(i + 1),
                      time=lambda df: np.array(df.index.tolist(),
                                               dtype='datetime64[D]') + np.
                      random.randint(low=0, high=4)) for i in range(10)
        ])
        dataset = TimeSeriesDataset.from_dataframe(df,
                                                   group_colname='group',
                                                   time_colname='time',
                                                   dt_unit='D',
                                                   measure_colnames=['y'])

        def _train(num_epochs: int = 15):
            kf = KalmanFilter(processes=[
                TBATS(id='day_of_week',
                      period=7,
                      dt_unit='D',
                      K=1,
                      process_variance=True,
                      decay=(.85, 1.))
            ],
                              measures=['y'])

            # train:
            optimizer = torch.optim.LBFGS(kf.parameters(), lr=.15, max_iter=10)

            def closure():
                optimizer.zero_grad()
                with warnings.catch_warnings():
                    warnings.simplefilter("ignore")
                    pred = kf(dataset.tensors[0],
                              start_datetimes=dataset.start_datetimes)
                loss = -pred.log_prob(dataset.tensors[0]).mean()
                loss.backward()
                return loss

            print(f"\nTraining for {num_epochs} epochs...")
            for i in range(num_epochs):
                loss = optimizer.step(closure)
                print("loss:", loss.item())

            return kf

        kf = None
        for i in range(MAX_TRIES):
            try:
                kf = _train()
            except RuntimeError as e:
                if 'cholesky' not in str(e):
                    raise e
            if kf is not None:
                break
        if kf is None:
            raise RuntimeError("MAX_TRIES exceeded")

        with torch.no_grad():
            pred = kf(dataset.tensors[0],
                      start_datetimes=dataset.start_datetimes)
        df_pred = pred.to_dataframe(dataset)
        self.assertLess(np.mean((df_pred['actual'] - df_pred['mean'])**2), .05)