Exemple #1
0
    def test__transform_unknown_nan(self):
        """Test the ``_transform`` with unknown and nans.

        This is an edge case for ``_transform`` where
        unknowns should be zeros and nans should be
        the last entry in the column.

        Input:
        - Series with unknown and nans
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        pd.Series(['a'])
        ohet.dummies = ['a']
        ohet.dummy_na = True
        ohet.num_dummies = 1

        # Run
        out = ohet._transform(pd.Series(['b', 'b', np.nan]))

        # Assert
        expected = np.array([[0, 0], [0, 0], [0, 1]])
        np.testing.assert_array_equal(out, expected)
Exemple #2
0
    def test__transform_zeros_categorical(self):
        """Test the ``_transform`` with unknown category.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation
        using the categorical branch where it should
        be a column of zeros.

        Input:
        - Series with categorical and unknown values
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        pd.Series(['a'])
        ohet.dummies = ['a']
        ohet.indexer = [0]
        ohet.num_dummies = 1
        ohet.dummy_encoded = True

        # Run
        out = ohet._transform(pd.Series(['b', 'b', 'b']))

        # Assert
        expected = np.array([[0], [0], [0]])
        np.testing.assert_array_equal(out, expected)
Exemple #3
0
    def test__transform_nans_categorical(self):
        """Test the ``_transform`` method with nans.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation using
        the categorical branch. Null values should be
        represented by the same encoding.

        Input:
        - Series with categorical values containing nans
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series([np.nan, None, 'a', 'b'])
        ohet.dummies = ['a', 'b']
        ohet.indexer = [0, 1]
        ohet.dummy_na = True
        ohet.num_dummies = 2
        ohet.dummy_encoded = True

        # Run
        out = ohet._transform(data)

        # Assert
        expected = np.array([[0, 0, 1], [0, 0, 1], [1, 0, 0], [0, 1, 0]])
        np.testing.assert_array_equal(out, expected)
Exemple #4
0
    def test__transform_single_categorical(self):
        """Test the ``_transform`` with one category.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation
        using the categorical branch where it should
        be a single column.

        Input:
        - Series with a single category
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'a', 'a'])
        ohet.dummies = ['a']
        ohet.indexer = [0]
        ohet.num_dummies = 1
        ohet.dummy_encoded = True

        # Run
        out = ohet._transform(data)

        # Assert
        expected = np.array([[1], [1], [1]])
        np.testing.assert_array_equal(out, expected)
Exemple #5
0
    def test__transform_no_nan(self):
        """Test the ``_transform`` method without nans.

        The values passed to ``_transform`` should be
        returned in a one-hot encoding representation.

        Input:
        - Series with values
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'b', 'c'])
        ohet.dummies = ['a', 'b', 'c']
        ohet.num_dummies = 3

        # Run
        out = ohet._transform(data)

        # Assert
        expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        np.testing.assert_array_equal(out, expected)