コード例 #1
0
    def test_fit(self):
        """Maps the values to probabilities."""

        # Setup
        col_meta = {"name": "breakfast", "type": "categorical"}
        data = pd.DataFrame({'breakfast': ['A', 'B', 'A', 'B', 'B']})
        transformer = CatTransformer()

        # Run
        transformer.fit(data, col_meta)

        # Check
        # Keys are unique values of initial data
        assert set(transformer.probability_map.keys()) == set(
            data['breakfast'].unique())

        frequency = {  # The frequency of the values in data
            'A': 0.4,
            'B': 0.6
        }

        for key in transformer.probability_map.keys():
            with self.subTest(key=key):
                values = transformer.probability_map[key]
                interval = values[0]
                mean = values[1]

                # Length of interval is frequency
                assert interval[1] - interval[0] == frequency[key]

                # Mean is middle point
                # We check this way because of floating point issues
                assert (mean - interval[0]) - (interval[1] - mean) < 1 / 1E9
コード例 #2
0
    def test_fit_transform_val_nan(self):
        """Tests that nans are handled by fit_transform method."""

        # Setup
        data = pd.DataFrame({'breakfast': [np.nan, 1, 5]})
        col_meta = {"name": "breakfast", "type": "categorical"}
        transformer = CatTransformer()

        # Run
        transformer.fit_transform(data, col_meta)

        # Check
        # The nan value in the data should be in probability map
        assert None in transformer.probability_map
コード例 #3
0
    def test_fit_transform_missing(self):
        """fit_transform sets internal state and transforms data with null values."""

        # Setup
        transformer = CatTransformer()
        original_column = pd.Series(['B', 'B', 'A', 'B', 'A'])
        col_meta = {"name": "breakfast", "type": "categorical"}

        # Run
        result = transformer.fit_transform(original_column,
                                           col_meta,
                                           missing=True)

        # Check
        assert original_column.equals(result)
コード例 #4
0
    def test_fit_transform(self):
        """fit_transform sets internal state and transforms data."""

        # Setup
        transformer = CatTransformer()
        col = pd.Series(['B', 'B', 'A', 'B', 'A'])
        col_meta = {"name": "breakfast", "type": "categorical"}

        expected_result = pd.DataFrame(
            {'breakfast': [0.7, 0.7, 0.2, 0.7, 0.2]})

        # Run
        result = transformer.fit_transform(col, col_meta, False)

        # Check
        assert result.equals(expected_result)
コード例 #5
0
    def test_get_category(self):
        """get_category return the category from a numerical value."""

        # Setup
        original_column = pd.DataFrame(
            {'breakfast': ['B', 'B', 'A', 'B', 'A']})
        col_meta = {"name": "breakfast", "type": "categorical"}
        transformer = CatTransformer()
        transformed_data = transformer.fit_transform(original_column, col_meta,
                                                     False)

        # Run
        result = transformer.get_category(transformed_data['breakfast'])

        # Check
        assert (result == original_column['breakfast']).all()
コード例 #6
0
    def test_get_val(self, rvs_mock):
        """Checks the random value."""

        # Setup
        transformer = CatTransformer()
        transformer.probability_map = {
            'A': ((0.6, 1.0), 0.8, 0.0666),
            'B': ((0, 0.6), 0.3, 0.0999)
        }
        rvs_mock.return_value = 1

        # Run
        result = transformer.get_val('B')

        # Check
        assert result == 1
コード例 #7
0
    def test___init__(self):
        """After parent init set type and probability_map."""

        # Run
        transformer = CatTransformer()

        # Check
        assert transformer.type == 'categorical'
        assert transformer.probability_map == {}
コード例 #8
0
    def test_reverse_transform(self):
        """reverse_transform change back the data into original format."""

        # Setup
        col_meta = {"name": "breakfast", "type": "categorical"}
        transformer = CatTransformer(col_meta=col_meta, missing=False)
        transformer.probability_map = {
            'A': ((0.6, 1.0), 0.8, 0.0666),
            'B': ((0, 0.6), 0.3, 0.0999)
        }
        transformer.col_name = 'breakfast'

        col = pd.DataFrame({'breakfast': [0.1, 0.4, 0.8, 0.3, 0.7]})
        expected_result = pd.DataFrame(
            {'breakfast': ['B', 'B', 'A', 'B', 'A']})

        # Run
        result = transformer.reverse_transform(col)

        # Check
        assert result.equals(expected_result)