Example #1
0
    def test_transform_numeric(self):
        """Test the ``transform`` on numeric input.

        In this test ``transform`` should return a matrix
        representing each item in the input as one-hot encodings.

        Input:
        - Series with numeric input
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series([1, 2])
        ohet.fit(data)

        expected = np.array([
            [1, 0],
            [0, 1],
        ])

        # Run
        out = ohet.transform(data)

        # Assert
        assert not ohet.dummy_encoded
        np.testing.assert_array_equal(out, expected)
Example #2
0
    def test_transform_unknown(self):
        """Test the ``transform`` with unknown data.

        In this test ``transform`` should raise an error
        due to the attempt of transforming data with previously
        unseen categories.

        Input:
        - Series with unknown categorical values
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a'])
        ohet.fit(data)

        # Assert
        with np.testing.assert_raises(ValueError):
            ohet.transform(['b'])
Example #3
0
def test_one_hot_numerical_nans():
    """Ensure OneHotEncodingTransformer works on numerical + nan only columns."""

    data = pd.Series([1, 2, float('nan'), np.nan])

    transformer = OneHotEncodingTransformer()
    transformer.fit(data)
    transformed = transformer.transform(data)
    reverse = transformer.reverse_transform(transformed)

    pd.testing.assert_series_equal(reverse, data)
Example #4
0
    def test_transform_single(self):
        """Test the ``transform`` on a single category.

        In this test ``transform`` should return a column
        filled with ones.

        Input:
        - Series with a single categorical value
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'a', 'a'])
        ohet.fit(data)

        # Run
        out = ohet.transform(data)

        # Assert
        expected = np.array([[1], [1], [1]])
        np.testing.assert_array_equal(out, expected)
Example #5
0
    def test_transform_nans(self):
        """Test the ``transform`` with nans.

        In this test ``transform`` should return an identity matrix
        representing each item in the input as well as nans.

        Input:
        - Series with categorical values and nans
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'b', None])
        ohet.fit(data)

        # Run
        out = ohet.transform(data)

        # Assert
        expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        np.testing.assert_array_equal(out, expected)