Beispiel #1
0
    def test__transform_unknown_nan(self):
        """Test the ``_transform`` with unknown and nans.

        This is an edge case for ``_transform`` where
        unknowns should be zeros and nans should be
        the last entry in the column.

        Input:
        - Series with unknown and nans
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        pd.Series(['a'])
        ohet.dummies = ['a']
        ohet.dummy_na = True
        ohet.num_dummies = 1

        # Run
        out = ohet._transform(pd.Series(['b', 'b', np.nan]))

        # Assert
        expected = np.array([[0, 0], [0, 0], [0, 1]])
        np.testing.assert_array_equal(out, expected)
Beispiel #2
0
    def test__transform_nans_categorical(self):
        """Test the ``_transform`` method with nans.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation using
        the categorical branch. Null values should be
        represented by the same encoding.

        Input:
        - Series with categorical values containing nans
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series([np.nan, None, 'a', 'b'])
        ohet.dummies = ['a', 'b']
        ohet.indexer = [0, 1]
        ohet.dummy_na = True
        ohet.num_dummies = 2
        ohet.dummy_encoded = True

        # Run
        out = ohet._transform(data)

        # Assert
        expected = np.array([[0, 0, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0]])
        np.testing.assert_array_equal(out, expected)