Example #1
0
def test_column_transformer_fit_transform_should_support_multiple_tuples():
    # Given
    test_case = ColumnChooserTestCase(
        data_inputs=np.array([[1, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22,
                                                               23]]),
        expected_outputs=np.array([[0, 1, 2, 3], [10, 11, 12, 13],
                                   [20, 21, 22, 23]]),
        expected_processed_outputs=np.array([[2, 2, 4], [20, 22, 24],
                                             [40, 42, 44]]),
        column_transformer_tuple_list=[(slice(0, 2), MultiplyBy2()),
                                       (2, MultiplyBy2())],
        n_dimension=3)
    data_inputs = test_case.data_inputs
    column_transformer = ColumnTransformer(
        test_case.column_transformer_tuple_list)

    # When
    column_transformer, outputs = column_transformer.fit_transform(
        data_inputs, test_case.expected_outputs)

    # Then
    assert np.array_equal(test_case.expected_processed_outputs, outputs)
    actual_fitted_data = column_transformer['2_MultiplyBy2'][
        'MultiplyBy2'].fitted_data
    expected_fitted_data = [([[2], [12], [22]], [[0, 1, 2, 3],
                                                 [10, 11, 12, 13],
                                                 [20, 21, 22, 23]])]
    assert_data_fitted_properly(actual_fitted_data, expected_fitted_data)

    actual_fitted_data = column_transformer['slice(0, 2, None)_MultiplyBy2'][
        'MultiplyBy2'].fitted_data
    expected_fitted_data = [([[1, 1], [10, 11], [20, 21]], [[0, 1, 2, 3],
                                                            [10, 11, 12, 13],
                                                            [20, 21, 22, 23]])]
    assert_data_fitted_properly(actual_fitted_data, expected_fitted_data)
Example #2
0
def test_column_transformer_transform_should_support_multiple_tuples():
    # Given
    test_case = ColumnChooserTestCase(
        data_inputs=np.array([
            [[1, 1, 2, 3]],
            [[10, 11, 12, 13]],
            [[20, 21, 22, 23]]
        ]),
        expected_outputs=np.array([
            [[0, 1, 2, 3]],
            [[10, 11, 12, 13]],
            [[20, 21, 22, 23]]
        ]),
        expected_processed_outputs=np.array([
            [[2, 2, 4]],
            [[20, 22, 24]],
            [[40, 42, 44]]
        ]),
        column_transformer_tuple_list=[
            (slice(0, 2), MultiplyBy2()),
            (2, MultiplyBy2())
        ],
        n_dimension=3
    )
    data_inputs = test_case.data_inputs
    p = ColumnTransformer(test_case.column_transformer_tuple_list, test_case.n_dimension)

    # When
    outputs = p.transform(data_inputs)

    # Then
    assert np.array_equal(test_case.expected_processed_outputs, outputs)
Example #3
0
def test_column_transformer_transform_should_support_indexes(test_case: ColumnChooserTestCase):
    data_inputs = test_case.data_inputs
    column_transformer = ColumnTransformer(test_case.column_transformer_tuple_list, test_case.n_dimension)

    outputs = column_transformer.transform(data_inputs)

    assert np.array_equal(outputs, test_case.expected_processed_outputs)
Example #4
0
def test_column_transformer_fit_should_support_indexes(test_case: ColumnChooserTestCase):
    data_inputs = test_case.data_inputs
    p = ColumnTransformer(test_case.column_transformer_tuple_list, test_case.n_dimension)

    p = p.fit(data_inputs, test_case.expected_outputs)

    actual_fitted_data = p[test_case.expected_step_key]['MultiplyBy2'].fitted_data
    expected_fitted_data = test_case.expected_fitted_data
    assert_data_fitted_properly(actual_fitted_data, expected_fitted_data)
 def __init__(self,
              input_columns: ColumnChooserTupleList,
              output_columns: ColumnChooserTupleList,
              n_dimension: int = 3):
     super().__init__([
         OutputTransformerWrapper(ColumnTransformer(output_columns,
                                                    n_dimension),
                                  from_data_inputs=True),
         ColumnTransformer(input_columns, n_dimension),
     ])
Example #6
0
def test_column_transformer_fit_transform_should_support_indexes(
        test_case: ColumnChooserTestCase):
    data_inputs = test_case.data_inputs
    expected_outputs = test_case.expected_outputs
    column_transformer = ColumnTransformer(
        test_case.column_transformer_tuple_list)

    column_transformer, outputs = column_transformer.fit_transform(
        data_inputs, expected_outputs)

    assert np.array_equal(outputs, test_case.expected_processed_outputs)
    actual_fitted_data = column_transformer[
        test_case.expected_step_key]['MultiplyBy2'].fitted_data
    expected_fitted_data = test_case.expected_fitted_data
    assert_data_fitted_properly(actual_fitted_data, expected_fitted_data)
def _apply_different_encoders_to_columns():
    """
    One standalone LabelEncoder will be applied on the pets,
    and another one will be shared for the columns owner and location.
    """
    p = ColumnTransformer(
        [
            # A different encoder will be used for column 0 with name "pets":
            (0, FlattenForEach(LabelEncoder(), then_unflatten=True)),
            # A shared encoder will be used for column 1 and 2, "owner" and "location":
            ([1, 2], FlattenForEach(LabelEncoder(), then_unflatten=True)),
        ],
        n_dimension=2)

    p, predicted_output = p.fit_transform(df.values)

    expected_output = np.array([[0, 1, 0, 2, 1, 1], [1, 3, 0, 1, 5, 3],
                                [4, 2, 2, 4, 4, 2]]).transpose()
    assert np.array_equal(predicted_output, expected_output)
Example #8
0
]
non_categorical_columns = [
    i for i in X.columns if i not in categorical_columns
]
categories = [
    ["clear", "misty", "rain"],
    ["spring", "summer", "fall", "winter"],
    ["False", "True"],
    ["False", "True"],
]
ordinal_encoder = OrdinalEncoder(categories=categories)

gbrt_pipeline = Pipeline([
    ColumnTransformer([
        (categorical_columns, ordinal_encoder),
        (non_categorical_columns, Identity()),
    ],
                      n_dimension=2),
    HistGradientBoostingRegressor(categorical_features=range(4), ),
])

# %%
#
# Lets evaluate our gradient boosting model with the mean absolute error of the
# relative demand averaged accross our 5 time-based cross-validation splits:


def evaluate(model, X, y, cv):
    class SilentMetaStep(MetaStep):
        """This class is needed here to disable the sklearn compatibility errors with the getters and setters."""
        def __init__(self):