Python OneHotEncodingTransformer.fit Examples

Programming Language: Python

Namespace/Package Name: rdt.transformers

Method/Function: fit

Examples at hotexamples.com: 13

Python OneHotEncodingTransformer.fit - 13 examples found. These are the top rated real world Python examples of rdt.transformers.OneHotEncodingTransformer.fit extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

OneHotEncodingTransformer(24)

fit(13)

_transform(5)

dummies(5)

num_dummies(5)

transform(5)

_prepare_data(3)

dummy_encoded(3)

indexer(3)

dummy_na(2)

reverse_transform(2)

Example #1

Show file

    def test_transform_numeric(self):
        """Test the ``transform`` on numeric input.

        In this test ``transform`` should return a matrix
        representing each item in the input as one-hot encodings.

        Input:
        - Series with numeric input
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series([1, 2])
        ohet.fit(data)

        expected = np.array([
            [1, 0],
            [0, 1],
        ])

        # Run
        out = ohet.transform(data)

        # Assert
        assert not ohet.dummy_encoded
        np.testing.assert_array_equal(out, expected)

Example #2

Show file

    def test_fit_single(self):
        # Setup
        ohet = OneHotEncodingTransformer()

        # Run
        data = pd.Series(['a', 'a', 'a'])
        ohet.fit(data)

        # Assert
        np.testing.assert_array_equal(ohet.dummies, ['a'])

Example #3

Show file

    def _fit_discrete(self, column_name, raw_column_data):
        """Fit one hot encoder for discrete column."""
        ohe = OneHotEncodingTransformer()
        ohe.fit(raw_column_data)
        num_categories = len(ohe.dummies)

        return ColumnTransformInfo(
            column_name=column_name, column_type="discrete", transform=ohe,
            transform_aux=None,
            output_info=[SpanInfo(num_categories, 'softmax')],
            output_dimensions=num_categories)

Example #4

Show file

File: test_categorical.py Project: sbrugman/RDT

def test_one_hot_numerical_nans():
    """Ensure OneHotEncodingTransformer works on numerical + nan only columns."""

    data = pd.Series([1, 2, float('nan'), np.nan])

    transformer = OneHotEncodingTransformer()
    transformer.fit(data)
    transformed = transformer.transform(data)
    reverse = transformer.reverse_transform(transformed)

    pd.testing.assert_series_equal(reverse, data)

Example #5

Show file

    def _fit_discrete(self, column, data):
        ohe = OneHotEncodingTransformer()
        data = data[:, 0]
        ohe.fit(data)
        num_categories = len(ohe.dummies)

        return {
            "name": column,
            "encoder": ohe,
            "output_info": [(num_categories, "softmax")],
            "output_dimensions": num_categories,
        }

Example #6

Show file

File: transformer.py Project: wps1215/CTGAN

    def _fit_discrete(self, column, data):
        ohe = OneHotEncodingTransformer()
        data = data[:, 0]
        ohe.fit(data)
        categories = len(set(data))

        return {
            'name': column,
            'encoder': ohe,
            'output_info': [(categories, 'softmax')],
            'output_dimensions': categories
        }

Example #7

Show file

    def test_reverse_transform_no_nans(self):
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'b', 'c'])
        ohet.fit(data)

        # Run
        transformed = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        out = ohet.reverse_transform(transformed)

        # Assert
        expected = pd.Series(['a', 'b', 'c'])
        pd.testing.assert_series_equal(out, expected)

Example #8

Show file

    def test_transform_unknown(self):
        """Test the ``transform`` with unknown data.

        In this test ``transform`` should raise an error
        due to the attempt of transforming data with previously
        unseen categories.

        Input:
        - Series with unknown categorical values
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a'])
        ohet.fit(data)

        # Assert
        with np.testing.assert_raises(ValueError):
            ohet.transform(['b'])

Example #9

Show file

    def test_transform_single(self):
        """Test the ``transform`` on a single category.

        In this test ``transform`` should return a column
        filled with ones.

        Input:
        - Series with a single categorical value
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'a', 'a'])
        ohet.fit(data)

        # Run
        out = ohet.transform(data)

        # Assert
        expected = np.array([[1], [1], [1]])
        np.testing.assert_array_equal(out, expected)

Example #10

Show file

    def test_transform_nans(self):
        """Test the ``transform`` with nans.

        In this test ``transform`` should return an identity matrix
        representing each item in the input as well as nans.

        Input:
        - Series with categorical values and nans
        Output:
        - one-hot encoding of the input
        """
        # Setup
        ohet = OneHotEncodingTransformer()
        data = pd.Series(['a', 'b', None])
        ohet.fit(data)

        # Run
        out = ohet.transform(data)

        # Assert
        expected = np.array([[1, 0, 0], [0, 1, 0], [0, 0, 1]])
        np.testing.assert_array_equal(out, expected)

Example #11

Show file

File: data_transformer.py Project: sdv-dev/CTGAN

    def _fit_discrete(self, data):
        """Fit one hot encoder for discrete column.

        Args:
            data (pd.DataFrame):
                A dataframe containing a column.

        Returns:
            namedtuple:
                A ``ColumnTransformInfo`` object.
        """
        column_name = data.columns[0]
        ohe = OneHotEncodingTransformer()
        ohe.fit(data, [column_name])
        num_categories = len(ohe.dummies)

        return ColumnTransformInfo(
            column_name=column_name,
            column_type='discrete',
            transform=ohe,
            output_info=[SpanInfo(num_categories, 'softmax')],
            output_dimensions=num_categories)

Example #12

Show file

    def test_fit_nans_numeric(self):
        """Test the ``fit`` method with nans.

        Check that the settings of the transformer
        are properly set based on the input. Encoding
        should be deactivated and NA activated.

        Input:
        - Series with containing nan values
        """

        # Setup
        ohet = OneHotEncodingTransformer()

        # Run
        data = pd.Series([1, 2, np.nan])
        ohet.fit(data)

        # Assert
        np.testing.assert_array_equal(ohet.dummies, [1, 2])
        np.testing.assert_array_equal(ohet.decoder, [1, 2, np.nan])
        assert not ohet.dummy_encoded
        assert ohet.dummy_na

Example #13

Show file

    def test_fit_no_nans(self):
        """Test the ``fit`` method without nans.

        Check that the settings of the transformer
        are properly set based on the input. Encoding
        should be activated

        Input:
        - Series with values
        """

        # Setup
        ohet = OneHotEncodingTransformer()

        # Run
        data = pd.Series(['a', 'b', 'c'])
        ohet.fit(data)

        # Assert
        np.testing.assert_array_equal(ohet.dummies, ['a', 'b', 'c'])
        np.testing.assert_array_equal(ohet.decoder, ['a', 'b', 'c'])
        assert ohet.dummy_encoded
        assert not ohet.dummy_na