예제 #1
0
    def test__transform_single_categorical(self):
        """Test the ``_transform`` with one category.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation
        using the categorical branch where it should
        be a single column.

        Input:
        - Series with a single category
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'a', 'a'])
        ohet.dummies = ['a']
        ohet.indexer = [0]
        ohet.num_dummies = 1
        ohet.dummy_encoded = True

        # Run
        out = ohet._transform(data)

        # Assert
        expected = np.array([[1], [1], [1]])
        np.testing.assert_array_equal(out, expected)
예제 #2
0
    def test__transform_zeros_categorical(self):
        """Test the ``_transform`` with unknown category.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation
        using the categorical branch where it should
        be a column of zeros.

        Input:
        - Series with categorical and unknown values
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        pd.Series(['a'])
        ohet.dummies = ['a']
        ohet.indexer = [0]
        ohet.num_dummies = 1
        ohet.dummy_encoded = True

        # Run
        out = ohet._transform(pd.Series(['b', 'b', 'b']))

        # Assert
        expected = np.array([[0], [0], [0]])
        np.testing.assert_array_equal(out, expected)
예제 #3
0
    def test__transform_nans_categorical(self):
        """Test the ``_transform`` method with nans.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation using
        the categorical branch. Null values should be
        represented by the same encoding.

        Input:
        - Series with categorical values containing nans
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series([np.nan, None, 'a', 'b'])
        ohet.dummies = ['a', 'b']
        ohet.indexer = [0, 1]
        ohet.dummy_na = True
        ohet.num_dummies = 2
        ohet.dummy_encoded = True

        # Run
        out = ohet._transform(data)

        # Assert
        expected = np.array([[0, 0, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0]])
        np.testing.assert_array_equal(out, expected)