예제 #1
0
    def test_fit_min_max_auto(self):
        """Test fit min and max parameters with ``'auto'``

        If the min or max parameters are set to ``'auto'``
        the ``fit`` method should learn them from the
        fitted data.

        Input:
        - Array of floats and null values
        Side Effect:
        - ``_min_value`` and ``_max_value`` are learned
        """
        # Setup
        data = np.array([-100, -5000, 0, None, 100, 4000])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           min_value='auto',
                                           max_value='auto')
        transformer.fit(data)

        # Asserts
        assert transformer._min_value == -5000
        assert transformer._max_value == 4000
예제 #2
0
    def test_fit_min_max_none(self):
        """Test fit min and max parameters with ``None``

        If the min and max parameters are set to ``None``,
        the ``fit`` method should not set its ``min`` or ``max``
        instance variables.

        Input:
        - Array of floats and null values
        Side Effect:
        - ``_min_value`` and ``_max_value`` stay ``None``
        """
        # Setup
        data = np.array([1.5, None, 2.5])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           min_value=None,
                                           max_value=None)
        transformer.fit(data)

        # Asserts
        assert transformer._min_value is None
        assert transformer._max_value is None
예제 #3
0
    def test_fit_rounding_auto_max_decimals(self):
        """Test fit rounding parameter with ``'auto'``

        If the ``rounding`` parameter is set to ``'auto'``,
        ``fit`` should learn the ``_rounding_digits`` to be the max
        number of decimal places seen in the data. The max
        amount of decimals that floats can be accurately compared
        with is 15. If the input data has values with more than
        14 decimals, we will not be able to accurately learn the
        number of decimal places required, so we do not round.

        Input:
        - Array with a value that has 15 decimals
        Side Effect:
        - ``_rounding_digits`` is set to ``None``
        """
        # Setup
        data = np.array([0.000000000000001])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding='auto')
        transformer.fit(data)

        # Asserts
        assert transformer._rounding_digits is None
예제 #4
0
    def test_fit_rounding_auto_max_inf(self):
        """Test fit rounding parameter with ``'auto'``

        If the ``rounding`` parameter is set to ``'auto'``,
        and the data contains infinite values, ``fit`` should
        learn the ``_rounding_digits`` to be the min
        number of decimal places seen in the data with
        the infinite values filtered out.

        Input:
        - Array with ``np.inf`` as a value
        Side Effect:
        - ``_rounding_digits`` is set to max seen in rest of data
        """
        # Setup
        data = np.array([15000, 4000, 60000, np.inf])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding='auto')
        transformer.fit(data)

        # Asserts
        assert transformer._rounding_digits == -3
예제 #5
0
    def test_fit_rounding_auto_large_numbers(self):
        """Test fit rounding parameter with ``'auto'``

        If the ``rounding`` parameter is set to ``'auto'``
        and the data is very large, ``fit`` should learn
        ``_rounding_digits`` to be the biggest number of 0s
        to round to that keeps the data the same.

        Input:
        - Array of data with numbers between 10^10 and 10^20
        Side Effect:
        - ``_rounding_digits`` is set to the minimum exponent seen in the data
        """
        # Setup
        exponents = [np.random.randint(10, 20) for i in range(10)]
        big_numbers = [10**exponents[i] for i in range(10)]
        data = np.array(big_numbers)

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding='auto')
        transformer.fit(data)

        # Asserts
        assert transformer._rounding_digits == -min(exponents)
예제 #6
0
    def test_fit_rounding_int(self):
        """Test fit rounding parameter with int

        If the rounding parameter is set to ``None``, the ``fit`` method
        should not set its ``rounding`` or ``_rounding_digits`` instance
        variables.

        Input:
        - An array with floats rounded to one decimal and a None value
        Side Effect:
        - ``rounding`` and ``_rounding_digits`` are the provided int
        """
        # Setup
        data = np.array([1.5, None, 2.5])
        expected_digits = 3

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding=expected_digits)
        transformer.fit(data)

        # Asserts
        assert transformer.rounding == expected_digits
        assert transformer._rounding_digits == expected_digits
예제 #7
0
    def test_fit(self):
        """Test fit nan mean with numpy.array"""
        # Setup
        data = np.array([1.5, None, 2.5])

        # Run
        transformer = NumericalTransformer(dtype=np.float, nan='nan')
        transformer.fit(data)

        # Asserts
        expect_fill_value = 'nan'
        expect_dtype = np.float

        assert transformer.null_transformer.fill_value == expect_fill_value
        assert transformer._dtype == expect_dtype
예제 #8
0
    def test_fit_rounding_auto_max_zero(self):
        """Test fit rounding parameter with ``'auto'``

        If the ``rounding`` parameter is set to ``'auto'``,
        and the max in the data is 0, ``fit`` should
        learn the ``_rounding_digits`` to be 0.

        Input:
        - Array with 0 as max value
        Side Effect:
        - ``_rounding_digits`` is set to 0
        """
        # Setup
        data = np.array([0, 0, 0])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding='auto')
        transformer.fit(data)

        # Asserts
        assert transformer._rounding_digits == 0
예제 #9
0
    def test_fit_rounding_auto(self):
        """Test fit rounding parameter with ``'auto'``

        If the ``rounding`` parameter is set to ``'auto'``,
        ``fit`` should learn the ``_rounding_digits`` to be the max
        number of decimal places seen in the data.

        Input:
        - Array of floats with up to 4 decimals
        Side Effect:
        - ``_rounding_digits`` is set to 4
        """
        # Setup
        data = np.array([1, 2.1, 3.12, 4.123, 5.1234, 6.123, 7.12, 8.1, 9])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding='auto')
        transformer.fit(data)

        # Asserts
        assert transformer._rounding_digits == 4
예제 #10
0
    def test_fit_min_max_int(self):
        """Test fit min and max parameters with int values

        If the min and max parameters are set to an int,
        the ``fit`` method should not change them.

        Input:
        - Array of floats and null values
        Side Effect:
        - ``_min_value`` and ``_max_value`` remain unchanged
        """
        # Setup
        data = np.array([1.5, None, 2.5])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           min_value=1,
                                           max_value=10)
        transformer.fit(data)

        # Asserts
        assert transformer._min_value == 1
        assert transformer._max_value == 10
예제 #11
0
    def test_fit_rounding_auto_max_negative(self):
        """Test fit rounding parameter with ``'auto'``

        If the ``rounding`` parameter is set to ``'auto'``,
        and the max in the data is negative, the ``fit`` method
        should learn ``_rounding_digits`` to be the min number
        of digits seen in those negative values.

        Input:
        - Array with negative max value
        Side Effect:
        - ``_rounding_digits`` is set to min number of digits in array
        """
        # Setup
        data = np.array([-500, -220, -10])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding='auto')
        transformer.fit(data)

        # Asserts
        assert transformer._rounding_digits == -1
예제 #12
0
    def test_fit_rounding_none(self):
        """Test fit rounding parameter with ``None``

        If the rounding parameter is set to ``None``, the ``fit`` method
        should not set its ``rounding`` or ``_rounding_digits`` instance
        variables.

        Input:
        - An array with floats rounded to one decimal and a None value
        Side Effect:
        - ``rounding`` and ``_rounding_digits`` continue to be ``None``
        """
        # Setup
        data = np.array([1.5, None, 2.5])

        # Run
        transformer = NumericalTransformer(dtype=np.float,
                                           nan='nan',
                                           rounding=None)
        transformer.fit(data)

        # Asserts
        assert transformer.rounding is None
        assert transformer._rounding_digits is None