Beispiel #1
0
    def test__prepare_constraints_validates_constraint_order(
            self, from_dict_mock):
        """Test the ``_prepare_constraints`` method validates the constraint order.

        If no constraint has ``rebuild_columns`` that are in a later
        constraint's ``constraint_columns``, no exception should be raised.

        Input:
        - List of constraints with none having ``rebuild_columns``
        that are in a later constraint's ``constraint_columns``.
        Output:
        - Sorted list of constraints.
        """
        # Setup
        constraint1 = Constraint(handling_strategy='reject_sampling')
        constraint2 = Constraint(handling_strategy='reject_sampling')
        constraint3 = Constraint(handling_strategy='transform')
        constraint4 = Constraint(handling_strategy='transform')
        constraints = [constraint1, constraint2, constraint3, constraint4]
        constraint3.rebuild_columns = ['e', 'd']
        constraint4.constraint_columns = ['a', 'b', 'c']
        constraint4.rebuild_columns = ['a']
        from_dict_mock.side_effect = [
            constraint1, constraint2, constraint3, constraint4
        ]

        # Run
        sorted_constraints = Table._prepare_constraints(constraints)

        # Assert
        assert sorted_constraints == constraints
Beispiel #2
0
    def test__prepare_constraints_invalid_order_raises_exception(
            self, from_dict_mock):
        """Test the ``_prepare_constraints`` method validates the constraint order.

        If one constraint has ``rebuild_columns`` that are in a later
        constraint's ``constraint_columns``, an exception should be raised.

        Input:
        - List of constraints with some having ``rebuild_columns``
        that are in a later constraint's ``constraint_columns``.
        Side Effect:
        - Exception should be raised.
        """
        # Setup
        constraint1 = Constraint(handling_strategy='reject_sampling')
        constraint2 = Constraint(handling_strategy='reject_sampling')
        constraint3 = Constraint(handling_strategy='transform')
        constraint4 = Constraint(handling_strategy='transform')
        constraints = [constraint1, constraint2, constraint3, constraint4]
        constraint3.rebuild_columns = ['a', 'd']
        constraint4.constraint_columns = ['a', 'b', 'c']
        constraint4.rebuild_columns = ['a']
        from_dict_mock.side_effect = [
            constraint1, constraint2, constraint3, constraint4
        ]

        # Run
        with pytest.raises(Exception):
            Table._prepare_constraints(constraints)
Beispiel #3
0
    def test__prepare_constraints_sorts_constraints_none_rebuild_columns(
            self, from_dict_mock):
        """Test that ``_prepare_constraints`` method sorts constraints.

        The ``_prepare_constraints`` method should sort constraints with None as
        ``rebuild_columns`` before those that have them.

        Input:
        - list of constraints with some having None as ``rebuild_columns``
        listed after those with ``rebuild_columns``.
        Output:
        - List of constraints sorted properly.
        """
        # Setup
        constraint1 = Constraint(handling_strategy='transform')
        constraint2 = Constraint(handling_strategy='transform')
        constraint3 = Constraint(handling_strategy='reject_sampling')
        constraints = [constraint1, constraint2, constraint3]
        constraint1.rebuild_columns = ['a']
        constraint2.rebuild_columns = ['b']
        constraint3.rebuild_columns = None
        from_dict_mock.side_effect = [constraint1, constraint2, constraint3]

        # Run
        sorted_constraints = Table._prepare_constraints(constraints)

        # Asserts
        assert sorted_constraints == [constraint3, constraint1, constraint2]
Beispiel #4
0
    def test__validate_data_meets_constraints_invalid_input(self):
        """Test the ``_validate_data_meets_constraint`` method.

        Expect that the method raises an error when the constraint columns
        are in the given data and the ``is_valid`` returns False for any row.

        Input:
        - Table data contains an invalid row
        Output:
        - None
        Side Effects:
        - A ``ConstraintsNotMetError`` is thrown
        """
        # Setup
        data = pd.DataFrame(
            {
                'a': [0, 1, 2, 3, 4, 5, 6, 7],
                'b': [3, 4, 5, 6, 7, 8, 9, 10]
            },
            index=[0, 1, 2, 3, 4, 5, 6, 7])
        constraint = Constraint()
        constraint.constraint_columns = ['a', 'b']
        is_valid_result = pd.Series(
            [True, False, True, False, False, False, False, False])
        constraint.is_valid = Mock(return_value=is_valid_result)

        # Run / Assert
        error_message = re.escape(
            "Data is not valid for the 'Constraint' constraint:\n   "
            'a  b\n1  1  4\n3  3  6\n4  4  7\n5  5  8\n6  6  9'
            '\n+1 more')
        with pytest.raises(ConstraintsNotMetError, match=error_message):
            constraint._validate_data_meets_constraint(data)
Beispiel #5
0
    def test_fit_trains_column_model(self, ht_mock, gm_mock):
        """Test the ``Constraint.fit`` method trains the column model.

        When ``fit_columns_model`` is True and there are multiple ``constraint_columns``,
        the ``Constraint.fit`` method is expected to:
        - Call ``_fit`` method.
        - Create ``_hyper_transformer``.
        - Create ``_column_model`` and train it.

        Input:
        - Table data (pandas.DataFrame)
        """
        # Setup
        table_data = pd.DataFrame({'a': [1, 2, 3], 'b': [4, 5, 6]})
        instance = Constraint(handling_strategy='transform',
                              fit_columns_model=True)
        instance.constraint_columns = ('a', 'b')

        # Run
        instance.fit(table_data)

        # Assert
        gm_mock.return_value.fit.assert_called_once()
        calls = ht_mock.return_value.fit_transform.mock_calls
        args = calls[0][1]
        assert len(calls) == 1
        pd.testing.assert_frame_equal(args[0], table_data)
Beispiel #6
0
    def test_transform__transform_errors(self):
        """Test that the ``transform`` method handles any errors.

        If the ``_transform`` method raises an error, the error should be raised.

        Setup:
            - Make ``_transform`` raise an error.

        Input:
            - ``pandas.DataFrame``.

        Output:
            - Same ``pandas.DataFrame``.

        Side effects:
            - Exception should be raised
        """
        # Setup
        instance = Constraint()
        instance._transform = Mock()
        instance._transform.side_effect = Exception()
        data = pd.DataFrame({'a': [1, 2, 3]})

        # Run / Assert
        with pytest.raises(Exception):
            instance.transform(data)
Beispiel #7
0
    def test_transform_model_enabled_reject_sampling_error(self):
        """Test that the ``Constraint.transform`` method raises an error appropriately.

        If the column model is used but doesn't return valid rows,
        reject sampling should be used to get the valid rows. If it doesn't
        get any valid rows in 100 tries, a ``ValueError`` is raised.

        Setup:
        - The ``_columns_model`` is fixed to always return an empty ``DataFrame``.
        Input:
        - Table with some missing columns.
        Side Effect:
        - ``ValueError`` raised.
        """
        # Setup
        instance = Constraint(handling_strategy='transform')
        instance.constraint_columns = ('a', 'b')
        instance._hyper_transformer = Mock()
        instance._columns_model = Mock()
        transformed_conditions = pd.DataFrame([[1]], columns=['b'])
        instance._columns_model.sample.return_value = pd.DataFrame()
        instance._hyper_transformer.transform.return_value = transformed_conditions
        instance._hyper_transformer.reverse_transform.return_value = pd.DataFrame(
        )

        # Run / Assert
        data = pd.DataFrame([[1, 2], [3, 4]], columns=['b', 'c'])
        with pytest.raises(ValueError):
            instance.transform(data)
Beispiel #8
0
    def test_transform_model_enabled_some_columns_missing(self):
        """Test that the ``Constraint.transform`` method uses column model.

        If ``table_data`` is missing some of the ``constraint_columns``,
        the ``_column_model`` should be used to sample the rest and the
        data should be transformed.

        Input:
        - Table with some missing columns.
        Output:
        - Transformed data with all columns.
        """
        # Setup
        instance = Constraint(handling_strategy='transform')
        instance._transform = lambda x: x
        instance.constraint_columns = ('a', 'b')
        instance._hyper_transformer = Mock()
        instance._columns_model = Mock()
        conditions = [
            pd.DataFrame([[5, 1, 2]], columns=['a', 'b', 'c']),
            pd.DataFrame([[6, 3, 4]], columns=['a', 'b', 'c'])
        ]
        transformed_conditions = [
            pd.DataFrame([[1]], columns=['b']),
            pd.DataFrame([[3]], columns=['b'])
        ]
        instance._columns_model.sample.return_value = pd.DataFrame(
            [[1, 2, 3]], columns=['b', 'c', 'a'])
        instance._hyper_transformer.transform.side_effect = transformed_conditions
        instance._hyper_transformer.reverse_transform.side_effect = conditions

        # Run
        data = pd.DataFrame([[1, 2], [3, 4]], columns=['b', 'c'])
        transformed_data = instance.transform(data)

        # Assert
        expected_tranformed_data = pd.DataFrame([[1, 2, 3]],
                                                columns=['b', 'c', 'a'])
        expected_result = pd.DataFrame([[5, 1, 2], [6, 3, 4]],
                                       columns=['a', 'b', 'c'])
        model_calls = instance._columns_model.sample.mock_calls
        assert len(model_calls) == 2
        instance._columns_model.sample.assert_any_call(num_rows=1,
                                                       conditions={'b': 1})
        instance._columns_model.sample.assert_any_call(num_rows=1,
                                                       conditions={'b': 3})
        reverse_transform_calls = instance._hyper_transformer.reverse_transform.mock_calls
        pd.testing.assert_frame_equal(reverse_transform_calls[0][1][0],
                                      expected_tranformed_data)
        pd.testing.assert_frame_equal(reverse_transform_calls[1][1][0],
                                      expected_tranformed_data)
        pd.testing.assert_frame_equal(transformed_data, expected_result)
Beispiel #9
0
    def test___init___not_kown(self):
        """Test ``Constraint.__init__`` method when a not known ``handling_strategy`` is passed.

        If a not known ``handling_strategy`` is given, a ValueError is raised.

        Input:
            - not_known
        Side effects:
            - ValueError
        """
        # Run
        with pytest.raises(ValueError):
            Constraint(handling_strategy='not_known')
Beispiel #10
0
    def test_fit(self):
        """Test the ``Constraint.fit`` method.

        The ``Constraint.fit`` method is a no-op method, so nothing needs to happen. We just call
        the method to certify that the interface is right.

        Input:
        - Table data (pandas.DataFrame)
        """
        # Setup
        table_data = pd.DataFrame({'a': [1, 2, 3]})

        # Run
        instance = Constraint(handling_strategy='transform')
        instance.fit(table_data)
Beispiel #11
0
    def test_transform_invalid_table_data(self):
        """Test the ``Constraint.transform`` method. If ``table_data``
        is invalid, it should raise an ``MissingConstraintColumnError``.

        The ``Constraint.transform`` method is expected to:
        - Raise ``MissingConstraintColumnError``.
        """
        # Run
        instance = Constraint(handling_strategy='transform')
        instance._transform = lambda x: x
        instance._constraint_columns = ('a')

        # Assert
        with pytest.raises(MissingConstraintColumnError):
            instance.transform(pd.DataFrame())
Beispiel #12
0
    def test__identity(self):
        """Test ```Constraint._identity`` method.

        ``_identity`` method should return whatever it is passed.

        Input:
            - anything
        Output:
            - Input
        """
        # Run
        instance = Constraint('all')
        output = instance._identity('input')

        # Asserts
        assert output == 'input'
Beispiel #13
0
    def test_transform(self):
        """Test the ``Constraint.transform`` method. It is an identity method for completion,
        to be optionally overwritten by subclasses.
        The ``Constraint.transform`` method is expected to:
        - Return the input data unmodified.
        Input:
        - Anything
        Output:
        - Input
        """
        # Run
        instance = Constraint(handling_strategy='transform')
        output = instance.transform('input')

        # Assert
        assert output == 'input'
Beispiel #14
0
    def test_transform_all_columns_missing(self):
        """Test the ``Constraint.transform`` method with all columns missing.

        If ``table_data`` is missing all of the ``constraint_columns`` a
        ``MissingConstraintColumnError`` is raised.

        The ``Constraint.transform`` method is expected to:
        - Raise ``MissingConstraintColumnError``.
        """
        # Run
        instance = Constraint()
        instance._transform = lambda x: x
        instance.constraint_columns = ('a', )

        # Assert
        with pytest.raises(MissingConstraintColumnError):
            instance.transform(pd.DataFrame())
Beispiel #15
0
    def test_transform_columns_missing(self):
        """Test the ``Constraint.transform`` method with invalid data.

        If ``table_data`` is missing any columns it should raise a
        ``MissingConstraintColumnError``.

        The ``Constraint.transform`` method is expected to:
        - Raise ``MissingConstraintColumnError``.
        """
        # Run
        instance = Constraint()
        instance._transform = lambda x: x
        instance.constraint_columns = ('a', )

        # Assert
        with pytest.raises(MissingConstraintColumnError):
            instance.transform(
                pd.DataFrame([[1, 2], [3, 4]], columns=['b', 'c']))
Beispiel #16
0
    def test_transform_model_enabled_all_columns_missing(self):
        """Test the ``Constraint.transform`` method with missing columns.

        If ``table_data`` is missing all of the ``constraint_columns`` and
        ``fit_columns_model`` is True, it should raise a
        ``MissingConstraintColumnError``.

        The ``Constraint.transform`` method is expected to:
        - Raise ``MissingConstraintColumnError``.
        """
        # Run
        instance = Constraint(handling_strategy='transform')
        instance._transform = lambda x: x
        instance.constraint_columns = ('a', )

        # Assert
        with pytest.raises(MissingConstraintColumnError):
            instance.transform(pd.DataFrame())
Beispiel #17
0
    def test_transform_model_enabled_reject_sampling_duplicates_valid_rows(
            self):
        """Test the ``Constraint.transform`` method's reject sampling fall back.

        If the column model is used but doesn't return valid rows,
        reject sampling should be used to get the valid rows. If after 100
        tries, some valid rows are created but not enough, then the valid rows
        are duplicated to meet the ``num_rows`` requirement.

        Setup:
        - The ``_columns_model`` returns some valid rows the first time, and then
        an empy ``DataFrame`` for every other call.
        Input:
        - Table with some missing columns.
        Output:
        - Transformed data with all columns.
        """
        # Setup
        instance = Constraint(handling_strategy='transform')
        instance._transform = lambda x: x
        instance.constraint_columns = ('a', 'b')
        instance._hyper_transformer = Mock()
        instance._columns_model = Mock()
        transformed_conditions = [
            pd.DataFrame([[1], [1], [1], [1], [1]], columns=['b'])
        ]
        instance._columns_model.sample.side_effect = [
            pd.DataFrame([[1, 2], [1, 3]], columns=['a', 'b'])
        ] + [pd.DataFrame()] * 100
        instance._hyper_transformer.transform.side_effect = transformed_conditions
        instance._hyper_transformer.reverse_transform = lambda x: x

        # Run
        data = pd.DataFrame([[1], [1], [1], [1], [1]], columns=['b'])
        transformed_data = instance.transform(data)

        # Assert
        expected_result = pd.DataFrame(
            [[1, 2], [1, 3], [1, 2], [1, 3], [1, 2]], columns=['a', 'b'])
        model_calls = instance._columns_model.sample.mock_calls
        assert len(model_calls) == 101
        instance._columns_model.sample.assert_any_call(num_rows=5,
                                                       conditions={'b': 1})
        pd.testing.assert_frame_equal(transformed_data, expected_result)
Beispiel #18
0
    def test_transform_model_enabled_reject_sampling(self):
        """Test the ``Constraint.transform`` method's reject sampling.

        If the column model is used but doesn't return valid rows,
        reject sampling should be used to get the valid rows.

        Setup:
        - The ``_columns_model`` returns some valid_rows the first time,
        and then the rest with the next call.
        Input:
        - Table with some missing columns.
        Output:
        - Transformed data with all columns.
        """
        # Setup
        instance = Constraint(handling_strategy='transform')
        instance._transform = lambda x: x
        instance.constraint_columns = ('a', 'b')
        instance._hyper_transformer = Mock()
        instance._columns_model = Mock()
        transformed_conditions = [
            pd.DataFrame([[1], [1], [1], [1], [1]], columns=['b'])
        ]
        instance._columns_model.sample.side_effect = [
            pd.DataFrame([[1, 2], [1, 3]], columns=['a', 'b']),
            pd.DataFrame([[1, 4], [1, 5], [1, 6], [1, 7]], columns=['a', 'b']),
        ]
        instance._hyper_transformer.transform.side_effect = transformed_conditions
        instance._hyper_transformer.reverse_transform = lambda x: x

        # Run
        data = pd.DataFrame([[1], [1], [1], [1], [1]], columns=['b'])
        transformed_data = instance.transform(data)

        # Assert
        expected_result = pd.DataFrame(
            [[1, 2], [1, 3], [1, 4], [1, 5], [1, 6]], columns=['a', 'b'])
        model_calls = instance._columns_model.sample.mock_calls
        assert len(model_calls) == 2
        instance._columns_model.sample.assert_any_call(num_rows=5,
                                                       conditions={'b': 1})
        assert model_calls[1][2]['num_rows'] > 3
        pd.testing.assert_frame_equal(transformed_data, expected_result)
Beispiel #19
0
    def test___init___all(self):
        """Test ``Constraint.__init__`` method when 'all' is passed.

        If 'all' is given, the ``__init__`` method should leave ``transform``,
        ``reverse_transform`` and ``is_valid`` untouched.

        Input:
            - all
        Side effects:
            - is_valid != identity
            - transform != identity
            - reverse_transform != identity
        """
        # Run
        instance = Constraint(handling_strategy='all')

        # Asserts
        assert instance.filter_valid != instance._identity
        assert instance.transform != instance._identity
        assert instance.reverse_transform != instance._identity
Beispiel #20
0
    def test___init___reject_sampling(self):
        """Test ``Constraint.__init__`` method when 'reject_sampling' is passed.

        If 'reject_sampling' is given, the ``__init__`` method should replace the ``transform``
        and ``reverse_transform`` methods with an identity and leave ``is_valid`` untouched.

        Input:
            - reject_sampling
        Side effects:
            - is_valid != identity
            - transform == identity
            - reverse_transform == identity
        """
        # Run
        instance = Constraint(handling_strategy='reject_sampling')

        # Asserts
        assert instance.filter_valid != instance._identity
        assert instance.transform == instance._identity
        assert instance.reverse_transform == instance._identity
Beispiel #21
0
    def test_fit(self):
        """Test the ``Constraint.fit`` method.

        The base ``Constraint.fit`` method is expected to:
        - Call ``_fit`` method.

        Input:
        - Table data (pandas.DataFrame)
        """
        # Setup
        table_data = pd.DataFrame({'a': [1, 2, 3]})
        instance = Constraint(handling_strategy='transform',
                              fit_columns_model=False)
        instance._fit = Mock()

        # Run
        instance.fit(table_data)

        # Assert
        instance._fit.assert_called_once_with(table_data)
Beispiel #22
0
    def test_fit_gaussian_multivariate_correct_distribution(self, gm_mock):
        """Test the ``GaussianMultivariate`` from the ``Constraint.fit`` method.

        The ``GaussianMultivariate`` is expected to be called with default distribution
        set as ``GaussianUnivariate``.

        Input:
        - Table data (pandas.DataFrame)
        """
        # Setup
        table_data = pd.DataFrame({'a': [1, 2, 3], 'b': [1, 2, 3]})
        instance = Constraint(handling_strategy='transform',
                              fit_columns_model=True)
        instance.constraint_columns = ('a', 'b')

        # Run
        instance.fit(table_data)

        # Assert
        gm_mock.assert_called_once_with(distribution=GaussianUnivariate)
Beispiel #23
0
    def test_reverse_transform(self):
        """Test the ``Constraint.reverse_transform`` method. It is an identity method
        for completion, to be optionally overwritten by subclasses.

        The ``Constraint.reverse_transform`` method is expected to:
            - Return a copy of the input data.

        Input:
            - Anything
        Output:
            - Input
        """
        # Setup
        instance = Constraint()
        data = pd.DataFrame()

        # Run
        output = instance.reverse_transform(data)

        # Assert
        pd.testing.assert_frame_equal(output, pd.DataFrame())
        assert id(output) != id(data)
Beispiel #24
0
    def test_fit(self):
        """Test the ``Constraint.fit`` method.

        The base ``Constraint.fit`` method is expected to:
        - Call ``_fit`` method.

        Input:
        - Table data (pandas.DataFrame)
        """
        # Setup
        table_data = pd.DataFrame({'a': [1, 2, 3]})
        instance = Constraint()
        instance._fit = Mock()
        instance._validate_data_meets_constraint = Mock()

        # Run
        instance.fit(table_data)

        # Assert
        instance._fit.assert_called_once_with(table_data)
        instance._validate_data_meets_constraint.assert_called_once_with(
            table_data)
Beispiel #25
0
    def test_is_valid(self):
        """Test the ``Constraint.is_valid` method. This should be overwritten by all the
        subclasses that have a way to decide which rows are valid and which are not.

        The ``Constraint.is_valid`` method is expected to:
        - Say whether the given table rows are valid.

        Input:
        - Table data (pandas.DataFrame)
        Output:
        - Series of ``True`` values (pandas.Series)
        """
        # Setup
        table_data = pd.DataFrame({'a': [1, 2, 3]})

        # Run
        instance = Constraint(handling_strategy='transform')
        out = instance.is_valid(table_data)

        # Assert
        expected_out = pd.Series([True, True, True])
        pd.testing.assert_series_equal(expected_out, out)
Beispiel #26
0
    def test__validate_data_meets_constraints_missing_cols(self):
        """Test the ``_validate_data_meets_constraint`` method.

        Expect that the method doesn't do anything when the columns are not in the given data.

        Input:
        - Table data that is missing a constraint column
        Output:
        - None
        Side Effects:
        - No error
        """
        # Setup
        data = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}, index=[0, 1, 2])
        constraint = Constraint()
        constraint.constraint_columns = ['a', 'b', 'c']
        constraint.is_valid = Mock()

        # Run
        constraint._validate_data_meets_constraint(data)

        # Assert
        assert not constraint.is_valid.called
Beispiel #27
0
    def test__validate_data_meets_constraints(self):
        """Test the ``_validate_data_meets_constraint`` method.

        Expect that the method calls ``is_valid`` when the constraint columns
        are in the given data.

        Input:
        - Table data
        Output:
        - None
        Side Effects:
        - No error
        """
        # Setup
        data = pd.DataFrame({'a': [0, 1, 2], 'b': [3, 4, 5]}, index=[0, 1, 2])
        constraint = Constraint()
        constraint.constraint_columns = ['a', 'b']
        constraint.is_valid = Mock()

        # Run
        constraint._validate_data_meets_constraint(data)

        # Assert
        constraint.is_valid.assert_called_once_with(data)
Beispiel #28
0
    def test_transform(self):
        """Test the ``Constraint.transform`` method.

        By default, it behaves like an identity method, to be optionally overwritten by subclasses.

        The ``Constraint.transform`` method is expected to:
            - Return a copy of the input data.

        Input:
            - a DataFrame

        Output:
            - Input
        """
        # Setup
        instance = Constraint()
        data = pd.DataFrame({'col': ['input']})

        # Run
        output = instance.transform(data)

        # Assert
        pd.testing.assert_frame_equal(output, pd.DataFrame({'col': ['input']}))
        assert id(output) != id(data)