def test_all_groups_missing_raises(input_df, errors): transform_df = mock.mock_raw_data(ids=[2, 3]) gp = GroupedPipeline(groupby=['id'], pipeline=Pipeline([col_selector]), errors=errors) gp.fit(input_df) with pytest.raises(KeyError, message='All keys missing in fitted pipelines'): gp.transform(transform_df)
def test_nb_batches(self, n_points, batch_size, batch_aggregator, expected_nb_batches): n_customers = 2 ids = np.arange(n_customers) df = mock_raw_data(periods=n_points, ids=ids) generator = RowBatchGenerator(df=df, batch_size=batch_size, columns=[ini.Columns.target], batch_aggregator=batch_aggregator) assert len(generator) == expected_nb_batches
def test_one_group_missing_return_none(input_df): transform_df = mock.mock_raw_data(ids=[0, 1, 2]) gp = GroupedPipeline(groupby=['id'], pipeline=Pipeline([val_selector]), errors='return_empty') gp.fit(input_df) out = gp.transform(transform_df) assert out.shape[1] == 1 transformed_part = transform_df[ini.Columns.target].values[:96] np.testing.assert_array_equal(out[:96, 0], transformed_part) assert np.isnan(out[96:]).all()
def test_one_groups_missing_return_df(input_df): transform_df = mock.mock_raw_data(ids=[0, 1, 2]) dt_feat = PandasDateTimeFeaturizer(attributes='month') gp = GroupedPipeline(groupby=['id'], pipeline=dt_feat, errors='return_df') gp.fit(input_df) out = gp.transform(transform_df) set(out.columns) == { 'id', ini.Columns.datetime, ini.Columns.target, 'month' } assert (~out[out.id == 0].month.isnull()).all() assert (~out[out.id == 1].month.isnull()).all() assert (out[out.id == 2].month.isnull()).all() orig_cols = ['id', ini.Columns.datetime, ini.Columns.target] pd.testing.assert_frame_equal(out[orig_cols], transform_df)
def test_columns(self, id_column, sequence_columns, last_step_columns, expected_columns): df = mock_raw_data(periods=10, ids=[0]) generator = SequenceForecastBatchGenerator( df=df, batch_size=2, sequence_length=2, forecast_steps_min=1, forecast_steps_max=1, id_column=id_column, sequence_columns=sequence_columns, sequence_prefix='seq_', last_step_columns=last_step_columns, last_step_prefix='end_of_') batch = generator[0] batch_columns = { col[0] if isinstance(col, tuple) else col for col in batch.columns } assert batch_columns == set(expected_columns)
def df(): return mock_raw_data(start_date=datetime.datetime(2017, 1, 1, 1, 0))
def input_df(): df = mock.mock_raw_data(ids=[0, 1]) df['group'] = np.random.randint(2, size=len(df)) return df
def test_raises_when_missing_key(input_df): transform_df = mock.mock_raw_data(ids=[0, 1, 2]) gp = GroupedPipeline(groupby=['id'], pipeline=Pipeline([col_selector])) gp.fit(input_df) with pytest.raises(KeyError, message="Missing key 2 in fitted pipelines"): gp.transform(transform_df)