예제 #1
0
def test_all_groups_missing_raises(input_df, errors):
    transform_df = mock.mock_raw_data(ids=[2, 3])
    gp = GroupedPipeline(groupby=['id'], pipeline=Pipeline([col_selector]),
                         errors=errors)
    gp.fit(input_df)
    with pytest.raises(KeyError,
                       message='All keys missing in fitted pipelines'):
        gp.transform(transform_df)
예제 #2
0
 def test_nb_batches(self, n_points, batch_size, batch_aggregator,
                     expected_nb_batches):
     n_customers = 2
     ids = np.arange(n_customers)
     df = mock_raw_data(periods=n_points, ids=ids)
     generator = RowBatchGenerator(df=df,
                                   batch_size=batch_size,
                                   columns=[ini.Columns.target],
                                   batch_aggregator=batch_aggregator)
     assert len(generator) == expected_nb_batches
예제 #3
0
def test_one_group_missing_return_none(input_df):
    transform_df = mock.mock_raw_data(ids=[0, 1, 2])
    gp = GroupedPipeline(groupby=['id'], pipeline=Pipeline([val_selector]),
                         errors='return_empty')
    gp.fit(input_df)
    out = gp.transform(transform_df)
    assert out.shape[1] == 1

    transformed_part = transform_df[ini.Columns.target].values[:96]
    np.testing.assert_array_equal(out[:96, 0], transformed_part)
    assert np.isnan(out[96:]).all()
예제 #4
0
def test_one_groups_missing_return_df(input_df):
    transform_df = mock.mock_raw_data(ids=[0, 1, 2])

    dt_feat = PandasDateTimeFeaturizer(attributes='month')
    gp = GroupedPipeline(groupby=['id'], pipeline=dt_feat, errors='return_df')
    gp.fit(input_df)
    out = gp.transform(transform_df)
    set(out.columns) == {
        'id', ini.Columns.datetime, ini.Columns.target, 'month'
    }
    assert (~out[out.id == 0].month.isnull()).all()
    assert (~out[out.id == 1].month.isnull()).all()
    assert (out[out.id == 2].month.isnull()).all()

    orig_cols = ['id', ini.Columns.datetime, ini.Columns.target]
    pd.testing.assert_frame_equal(out[orig_cols], transform_df)
예제 #5
0
 def test_columns(self, id_column, sequence_columns, last_step_columns,
                  expected_columns):
     df = mock_raw_data(periods=10, ids=[0])
     generator = SequenceForecastBatchGenerator(
         df=df,
         batch_size=2,
         sequence_length=2,
         forecast_steps_min=1,
         forecast_steps_max=1,
         id_column=id_column,
         sequence_columns=sequence_columns,
         sequence_prefix='seq_',
         last_step_columns=last_step_columns,
         last_step_prefix='end_of_')
     batch = generator[0]
     batch_columns = {
         col[0] if isinstance(col, tuple) else col
         for col in batch.columns
     }
     assert batch_columns == set(expected_columns)
예제 #6
0
def df():
    return mock_raw_data(start_date=datetime.datetime(2017, 1, 1, 1, 0))
예제 #7
0
def input_df():
    df = mock.mock_raw_data(ids=[0, 1])
    df['group'] = np.random.randint(2, size=len(df))
    return df
예제 #8
0
def test_raises_when_missing_key(input_df):
    transform_df = mock.mock_raw_data(ids=[0, 1, 2])
    gp = GroupedPipeline(groupby=['id'], pipeline=Pipeline([col_selector]))
    gp.fit(input_df)
    with pytest.raises(KeyError, message="Missing key 2 in fitted pipelines"):
        gp.transform(transform_df)