예제 #1
0
 def test_single_batch(self, use_tensor_extension):
     df = mock_fit_data(
         periods=9, ids=[0], use_tensor_extension=use_tensor_extension
     )
     seq_length = 2
     generator = SequenceForecastBatchGenerator(
         df=df,
         batch_size=4,
         sequence_length=seq_length,
         id_column='id',
         sequence_columns=[ini.Columns.datetime, ini.Columns.target],
         sequence_prefix='seq_',
         last_step_columns=[],
         forecast_steps_min=1,
         forecast_steps_max=1,
     )
     assert len(generator) == 1
     batch = generator[0]
     assert isinstance(batch, pd.DataFrame)
     expected_columns = [
         'id', ini.Columns.datetime, ini.Columns.target,
         f'seq_{ini.Columns.datetime}', f'seq_{ini.Columns.target}'
     ]
     for col in expected_columns:
         assert col in batch
     sequence_columns = [
         f'seq_{ini.Columns.datetime}', f'seq_{ini.Columns.target}'
     ]
     for sequence_column in sequence_columns:
         sequenced = batch[sequence_column]
         assert sequenced.values.shape[1] == seq_length
예제 #2
0
 def test_single_batch_with_last_step(self):
     df = mock_fit_data(periods=9, ids=[0])
     seq_length = 2
     generator = SequenceForecastBatchGenerator(
         df=df,
         batch_size=4,
         sequence_length=seq_length,
         id_column='id',
         sequence_columns=[ini.Columns.datetime, ini.Columns.target],
         sequence_prefix='seq_',
         last_step_columns=[ini.Columns.datetime],
         last_step_prefix='last_step_',
         forecast_steps_min=1,
         forecast_steps_max=1,
     )
     assert len(generator) == 1
     batch = generator[0]
     assert isinstance(batch, pd.DataFrame)
     expected_columns = [
         'id', ini.Columns.datetime, ini.Columns.target,
         f'seq_{ini.Columns.datetime}', f'seq_{ini.Columns.target}',
         f'last_step_{ini.Columns.datetime}'
     ]
     for col in expected_columns:
         assert col in batch
     sequence_columns = [
         f'seq_{ini.Columns.datetime}', f'seq_{ini.Columns.target}'
     ]
     for sequence_column in sequence_columns:
         values = batch[sequence_column].values
         assert values.shape[1] == seq_length
     last_step_columns = [f'last_step_{ini.Columns.datetime}']
     for column in last_step_columns:
         values = batch[column].values
         assert len(values.shape) == 1
예제 #3
0
 def test_random_offset(self, random):
     df = mock_fit_data(periods=101, ids=[0])
     generator = SequenceForecastBatchGenerator(
         df=df,
         batch_offset=True,
         sequence_length=10,
     )
     with pytest.raises(AssertionError):
         assert_array_equal(generator[0], generator[0])
예제 #4
0
def test_featurize_is_in_interval(featurizer, periods):
    df = mock_fit_data(start_date=datetime.datetime(2017, 1, 1, 0, 0),
                       periods=periods)
    featurizer.set_params(column=datetime_column,
                          attributes='is_in_interval',
                          kwargs={
                              'start_time': '17:00',
                              'end_time': '19:00',
                          })
    df = featurizer.transform(df)
    assert np.sum(df['is_in_interval']) == 4 * len(df) / 48
예제 #5
0
def test_get_time_is_in_interval_from_series(periods):
    df = mock_fit_data(start_date=datetime.datetime(2017, 1, 1, 0, 0),
                       periods=periods)
    is_peak = get_time_is_in_interval_from_series(start_time='17:00',
                                                  end_time='19:00',
                                                  series=df[datetime_column])
    not_peak = get_time_is_in_interval_from_series(start_time='19:00',
                                                   end_time='17:00',
                                                   series=df[datetime_column])
    assert len(is_peak) == periods
    assert np.sum(not_peak) == 44 * len(df) / 48
예제 #6
0
 def test_get_sequence_values(self):
     n_points, sequence_length = 10, 2
     df = mock_fit_data(periods=n_points, ids=[0])
     gen = SamplingForecastBatchGenerator(
         df=df,
         sequence_length=sequence_length,
     )
     start_indices = np.array([0, 3, 1])
     num_indices = len(start_indices)
     seq_values = gen._get_sequence_values(ini.Columns.id, start_indices)
     assert seq_values.shape == (num_indices, sequence_length)
예제 #7
0
 def test_random_offset_value_with_period(self, random, seq_len, period,
                                          expected_max_offset):
     df = mock_fit_data(periods=101, ids=[0])
     generator = SequenceForecastBatchGenerator(df=df,
                                                sequence_length=seq_len,
                                                batch_offset=True,
                                                batch_offset_period=period)
     offsets = [generator.random_offset_value for _ in range(100)]
     assert min(offsets) == 0
     assert max(offsets) == expected_max_offset
     assert all(offset % period == 0 for offset in offsets)
예제 #8
0
 def test_subgen_lengths(self, n_customers, batch_size, exp_sg_len):
     n_customers = 3
     ids = np.arange(n_customers)
     df = mock_fit_data(periods=3, ids=ids)
     generator = SequenceForecastBatchGenerator(
         df=df,
         sequence_length=1,
         forecast_steps_max=1,
         batch_size=batch_size,
     )
     assert all(sgl == exp_sg_len for sgl in generator.subgen_lengths)
예제 #9
0
 def test_n_batches(self, n_points, seq_length, fc_max, batch_size,
                    n_batches_expected):
     df = mock_fit_data(periods=n_points, ids=[0])
     generator = SequenceForecastBatchGenerator(
         df=df,
         batch_size=batch_size,
         sequence_length=seq_length,
         forecast_steps_min=1,
         forecast_steps_max=fc_max,
     )
     assert len(generator) == n_batches_expected
예제 #10
0
 def test_n_subgens(self, n_customers):
     ids = np.arange(n_customers)
     df = mock_fit_data(periods=4, ids=ids)
     generator = SequenceForecastBatchGenerator(
         df=df,
         sequence_length=2,
         forecast_steps_max=1,
         batch_size=2**10,
     )
     assert len(generator.chunks) == n_customers
     assert len(generator.subgen_lengths) == n_customers
     assert len(generator.subgen_index_bounds) == n_customers + 1
예제 #11
0
 def test_invalid_start_time(self):
     df = mock_fit_data(periods=1344, ids=[0])
     df = df.sort_values(by=[ini.Columns.datetime])
     start_time = (df[ini.Columns.datetime][0] +
                   pd.Timedelta(1, unit='m')).time()
     generator = SequenceForecastBatchGenerator(
         df=df,
         sequence_length=48,
         sequence_columns=[ini.Columns.datetime],
         start_time=start_time)
     with pytest.raises(ValueError):
         generator[0]
예제 #12
0
 def test_aggregate_ids(self):
     n_customers = 2
     ids = np.arange(n_customers)
     df = mock_fit_data(periods=3, ids=ids)
     generator = SequenceForecastBatchGenerator(df=df,
                                                sequence_length=2,
                                                forecast_steps_max=1,
                                                batch_size=2,
                                                batch_aggregator=2)
     assert len(generator) == 1
     batch = generator[0]
     assert len(batch) == 2
예제 #13
0
def test_get_is_morning_peak_from_series(featurizer, periods):
    df = mock_fit_data(start_date=datetime.datetime(2017, 1, 1, 0, 0),
                       periods=periods)
    featurizer.set_params(
        column=datetime_column,
        attributes=['is_peak', 'is_daytime', 'is_morningpeak'])
    df = featurizer.transform(df)
    assert 'is_peak' in df
    assert 'is_daytime' in df
    assert 'is_morningpeak' in df
    assert np.sum(df['is_peak']) == 6 * len(df) / 48
    assert np.sum(df['is_daytime']) == 34 * len(df) / 48
    assert np.sum(df['is_morningpeak']) == 10 * len(df) / 48
예제 #14
0
 def test_find_batch_raises_outside_subgens(self):
     n_customers = 3
     ids = np.arange(n_customers)
     df = mock_fit_data(periods=3, ids=ids)
     generator = SequenceForecastBatchGenerator(
         df=df,
         sequence_length=1,
         forecast_steps_max=1,
         batch_size=2**10,
     )
     batch_idx = 2**10
     with pytest.raises(IndexError):
         generator.find_subbatch_in_subgens(batch_idx)
예제 #15
0
    def test_num_examples(self, n_points, seq_length, fc_max,
                          n_sequences_expected):
        df = mock_fit_data(periods=n_points, ids=[0])
        generator = SequenceForecastBatchGenerator(
            df=df,
            sequence_length=seq_length,
            forecast_steps_min=1,
            forecast_steps_max=fc_max,
        )
        assert generator.num_examples == n_sequences_expected

        generator.batch_offset = True
        assert generator.num_examples == max(0, n_sequences_expected - 1)
예제 #16
0
 def test_find_batch_in_subgens(self, batch_size, batch_idx, exp_subgen_idx,
                                exp_idx_in_subgen):
     n_customers = 3
     ids = np.arange(n_customers)
     df = mock_fit_data(periods=3, ids=ids)
     generator = SequenceForecastBatchGenerator(
         df=df,
         sequence_length=1,
         forecast_steps_max=1,
         batch_size=batch_size,
     )
     subgen_idx, idx_in_subgen = generator.find_subbatch_in_subgens(
         batch_idx)
     assert subgen_idx == exp_subgen_idx
     assert idx_in_subgen == exp_idx_in_subgen
예제 #17
0
 def test_batch_size(self, n_points, seq_length, fc_max, batch_size,
                     expected_last_batch_size):
     df = mock_fit_data(periods=n_points, ids=[0])
     generator = SequenceForecastBatchGenerator(
         df=df,
         batch_size=batch_size,
         sequence_columns=[ini.Columns.target],
         last_step_columns=[],
         sequence_length=seq_length,
         forecast_steps_min=1,
         forecast_steps_max=fc_max,
     )
     for batch_idx in range(len(generator) - 1):
         assert len(generator[batch_idx]) == batch_size
     assert len(generator[-1]) == expected_last_batch_size
예제 #18
0
 def test_n_batches_with_offset(
     self, n_points, seq_length, fc_max, batch_size, n_batches_expected,
     use_tensor_extension
 ):
     df = mock_fit_data(
         periods=n_points,
         ids=[0],
         use_tensor_extension=use_tensor_extension
     )
     generator = SequenceForecastBatchGenerator(
         df=df,
         batch_size=batch_size,
         sequence_length=seq_length,
         forecast_steps_min=1,
         forecast_steps_max=fc_max,
         batch_offset=True,
     )
     assert len(generator) == n_batches_expected
예제 #19
0
 def test_start_time(self, start_time_idx, expected_start_time_idx):
     df = mock_fit_data(periods=1344, ids=[0])
     df = df.sort_values(by=[ini.Columns.datetime])
     if start_time_idx is None:
         start_time = None
     else:
         start_time = df[ini.Columns.datetime][start_time_idx].time()
     expected_start_time = df[
         ini.Columns.datetime][expected_start_time_idx].time()
     generator = SequenceForecastBatchGenerator(
         df=df,
         sequence_length=48,
         sequence_columns=[ini.Columns.datetime],
         batch_offset=False,
         start_time=start_time)
     batch = generator[0]
     actual_start_time = batch[f'seq_{ini.Columns.datetime}'][0][0].time()
     assert actual_start_time == expected_start_time
예제 #20
0
def indexed_df():
    return mock_fit_data(index=True)
예제 #21
0
def df():
    return mock_fit_data(periods=N_TIMES, ids=np.arange(N_IDS))
예제 #22
0
def df(use_tensor_extension):
    return mock_fit_data(use_tensor_extension=use_tensor_extension)
예제 #23
0
def df():
    return mock_fit_data(start_date=datetime.datetime(2017, 1, 1, 1, 0))
예제 #24
0
def validation_df():
    return mock_fit_data(periods=13)
예제 #25
0
def df():
    return mock_fit_data(periods=13)
예제 #26
0
def df():
    return mock_fit_data()