Esempio n. 1
0
def test_column_transformer_fit_transform_should_support_multiple_tuples():
    # Given
    test_case = ColumnChooserTestCase(
        data_inputs=np.array([[1, 1, 2, 3], [10, 11, 12, 13], [20, 21, 22,
                                                               23]]),
        expected_outputs=np.array([[0, 1, 2, 3], [10, 11, 12, 13],
                                   [20, 21, 22, 23]]),
        expected_processed_outputs=np.array([[2, 2, 4], [20, 22, 24],
                                             [40, 42, 44]]),
        column_transformer_tuple_list=[(slice(0, 2), MultiplyBy2()),
                                       (2, MultiplyBy2())],
        n_dimension=3)
    data_inputs = test_case.data_inputs
    column_transformer = ColumnTransformer(
        test_case.column_transformer_tuple_list)

    # When
    column_transformer, outputs = column_transformer.fit_transform(
        data_inputs, test_case.expected_outputs)

    # Then
    assert np.array_equal(test_case.expected_processed_outputs, outputs)
    actual_fitted_data = column_transformer['2_MultiplyBy2'][
        'MultiplyBy2'].fitted_data
    expected_fitted_data = [([[2], [12], [22]], [[0, 1, 2, 3],
                                                 [10, 11, 12, 13],
                                                 [20, 21, 22, 23]])]
    assert_data_fitted_properly(actual_fitted_data, expected_fitted_data)

    actual_fitted_data = column_transformer['slice(0, 2, None)_MultiplyBy2'][
        'MultiplyBy2'].fitted_data
    expected_fitted_data = [([[1, 1], [10, 11], [20, 21]], [[0, 1, 2, 3],
                                                            [10, 11, 12, 13],
                                                            [20, 21, 22, 23]])]
    assert_data_fitted_properly(actual_fitted_data, expected_fitted_data)
Esempio n. 2
0
def test_column_transformer_fit_transform_should_support_indexes(test_case: ColumnChooserTestCase):
    data_inputs = test_case.data_inputs
    expected_outputs = test_case.expected_outputs
    p = ColumnTransformer(test_case.column_transformer_tuple_list, test_case.n_dimension)

    p, outputs = p.fit_transform(data_inputs, expected_outputs)

    assert np.array_equal(outputs, test_case.expected_processed_outputs)
    actual_fitted_data = p[test_case.expected_step_key]['MultiplyBy2'].fitted_data
    expected_fitted_data = test_case.expected_fitted_data
    assert_data_fitted_properly(actual_fitted_data, expected_fitted_data)
def _apply_different_encoders_to_columns():
    """
    One standalone LabelEncoder will be applied on the pets,
    and another one will be shared for the columns owner and location.
    """
    p = ColumnTransformer(
        [
            # A different encoder will be used for column 0 with name "pets":
            (0, FlattenForEach(LabelEncoder(), then_unflatten=True)),
            # A shared encoder will be used for column 1 and 2, "owner" and "location":
            ([1, 2], FlattenForEach(LabelEncoder(), then_unflatten=True)),
        ],
        n_dimension=2)

    p, predicted_output = p.fit_transform(df.values)

    expected_output = np.array([[0, 1, 0, 2, 1, 1], [1, 3, 0, 1, 5, 3],
                                [4, 2, 2, 4, 4, 2]]).transpose()
    assert np.array_equal(predicted_output, expected_output)