Beispiel #1
0
def simulate_daily_series(num_groups: int, num_timesteps: int, noise: float = 1.0) -> 'DataFrame':
    # create realistic series:
    tensor = _simulate(num_groups, num_timesteps, noise=noise, dt_unit='D')

    # convert to dataset:
    dataset = TimeSeriesDataset(
        tensor,
        group_names=range(num_groups),
        start_times=[DEFAULT_START_DT] * num_groups,
        measures=[['y']],
        dt_unit='D'
    )
    # convert to dataframe:
    df = dataset.to_dataframe()

    # add predictors:
    # TODO: meaningful predictors
    df['X1'] = np.random.normal(size=len(df.index))
    df['X2'] = np.random.normal(size=len(df.index))

    # make number of timesteps per group non-uniform:
    max_timestep_per_group = dict(zip(
        range(num_groups),
        np.random.choice(range(int(num_timesteps * .80), num_timesteps), size=num_groups, replace=True)
    ))
    df['_max_time'] = DEFAULT_START_DT + df['group'].map(max_timestep_per_group)
    df = df.loc[df['time'] <= df.pop('_max_time'), :].reset_index(drop=True)

    return df
Beispiel #2
0
def simulate_daily_series(num_groups: int, num_timesteps: int, noise: float = 1.0) -> 'DataFrame':
    season_spec = {
        'season_start': np.datetime64('2007-01-01'),  # arbitrary monday at midnight
        'dt_unit': 'D'
    }
    # create realistic series:
    tensor = _simulate(num_groups, num_timesteps, season_spec, noise=noise)

    # convert to dataset:
    dataset = TimeSeriesDataset(
        tensor,
        group_names=range(num_groups),
        start_times=[season_spec['season_start']] * num_groups,
        measures=[['y']],
        dt_unit=season_spec['dt_unit']
    )
    # convert to dataframe:
    df = dataset.to_dataframe()

    # add predictors:
    # TODO: meaningful predictors
    df['X1'] = np.random.normal(size=len(df.index))
    df['X2'] = np.random.normal(size=len(df.index))

    # make number of timesteps per group non-uniform:
    max_timestep_per_group = dict(zip(
        range(num_groups),
        np.random.choice(range(int(num_timesteps * .80), num_timesteps), size=num_groups, replace=True)
    ))
    df['_max_time'] = season_spec['season_start'] + df['group'].map(max_timestep_per_group)
    df = df.loc[df['time'] <= df.pop('_max_time'), :].reset_index(drop=True)

    return df
Beispiel #3
0
    def test_time_series_dataset(self):
        values = torch.randn((3, 39, 2))

        batch = TimeSeriesDataset(
            values,
            group_names=['one', 'two', 'three'],
            start_times=[0, 0, 0],
            measures=[['y1', 'y2']],
            dt_unit=None
        )
        try:
            import pandas as pd
        except ImportError:
            warn("Not testing TimeSeriesDataset.to_dataframe, pandas not installed.")
            return
        df1 = batch.to_dataframe()

        df2 = pd.concat([
            pd.DataFrame(values[i].numpy(), columns=batch.all_measures).assign(group=group, time=batch.times()[0])
            for i, group in enumerate(batch.group_names)
        ])
        self.assertTrue((df1 == df2).all().all())