Exemple #1
0
    def test_set_params(self, mock_open, mock_load_model, mock_base_processor):

        self._setup_mock_load_model(mock_load_model)
        self._setup_mock_load_processor(mock_base_processor)

        # load default
        data_labeler = UnstructuredDataLabeler()

        # check empty sent
        with self.assertRaisesRegex(
                ValueError,
                re.escape("The params dict must have the following "
                          "format:\nparams=dict(preprocessor=dict(..."
                          "), model=dict(...), postprocessor=dict(..."
                          ")), where each sub-dict contains "
                          "parameters of the specified data_labeler "
                          "pipeline components."),
        ):
            data_labeler.set_params(None)

        with self.assertRaisesRegex(
                ValueError,
                re.escape("The params dict must have the following "
                          "format:\nparams=dict(preprocessor=dict(..."
                          "), model=dict(...), postprocessor=dict(..."
                          ")), where each sub-dict contains "
                          "parameters of the specified data_labeler "
                          "pipeline components."),
        ):
            data_labeler.set_params({})

        # test if invalid key sent
        with self.assertRaisesRegex(
                ValueError,
                re.escape("The params dict must have the following "
                          "format:\nparams=dict(preprocessor=dict(..."
                          "), model=dict(...), postprocessor=dict(..."
                          ")), where each sub-dict contains "
                          "parameters of the specified data_labeler "
                          "pipeline components."),
        ):
            data_labeler.set_params({"bad key": None})

        # validate no errors occur when correct params are sent
        data_labeler._preprocessor.get_parameters.return_value = dict()
        data_labeler._model.get_parameters.return_value = dict()
        data_labeler._postprocessor.get_parameters.return_value = dict()

        data_labeler.set_params({
            "preprocessor": {
                "test": 1
            },
            "model": {
                "test": 1
            },
            "postprocessor": {
                "test2": 3
            },
        })

        # validate warning on overlaps.
        # here we presume parameters are set as dict(test=1), dict(test=2)
        data_labeler._preprocessor.get_parameters.return_value = dict(test=1)
        data_labeler._model.get_parameters.return_value = dict(test=2)
        with self.assertWarnsRegex(
                RuntimeWarning,
                "Model and preprocessor value for `test` do "
                "not match. 2 != 1",
        ):
            data_labeler.set_params({
                "preprocessor": {
                    "test": 1
                },
                "model": {
                    "test": 2
                }
            })

        # check if param sent for missing pipeline component
        data_labeler._preprocessor = None
        with self.assertRaisesRegex(
                ValueError,
                "Parameters for the preprocessor, model, or"
                " postprocessor were specified when one or "
                "more of these were not set in the "
                "DataLabeler.",
        ):
            data_labeler.set_params({"preprocessor": {"test": 1}})

        data_labeler._model = None
        with self.assertRaisesRegex(
                ValueError,
                "Parameters for the preprocessor, model, or"
                " postprocessor were specified when one or "
                "more of these were not set in the "
                "DataLabeler.",
        ):
            data_labeler.set_params({"model": {"test": 1}})

        data_labeler._postprocessor = None
        with self.assertRaisesRegex(
                ValueError,
                "Parameters for the preprocessor, model, or"
                " postprocessor were specified when one or "
                "more of these were not set in the "
                "DataLabeler.",
        ):
            data_labeler.set_params({"postprocessor": {"test": 1}})
Exemple #2
0
    def test_set_params(self, mock_open, mock_load_model, mock_base_processor):

        self._setup_mock_load_model(mock_load_model)
        self._setup_mock_load_processor(mock_base_processor)

        # load default
        data_labeler = UnstructuredDataLabeler()

        # check empty sent
        with self.assertRaisesRegex(
                ValueError,
                re.escape('The params dict must have the following '
                          'format:\nparams=dict(preprocessor=dict(...'
                          '), model=dict(...), postprocessor=dict(...'
                          ')), where each sub-dict contains '
                          'parameters of the specified data_labeler '
                          'pipeline components.')):
            data_labeler.set_params(None)

        with self.assertRaisesRegex(
                ValueError,
                re.escape('The params dict must have the following '
                          'format:\nparams=dict(preprocessor=dict(...'
                          '), model=dict(...), postprocessor=dict(...'
                          ')), where each sub-dict contains '
                          'parameters of the specified data_labeler '
                          'pipeline components.')):
            data_labeler.set_params({})

        # test if invalid key sent
        with self.assertRaisesRegex(
                ValueError,
                re.escape('The params dict must have the following '
                          'format:\nparams=dict(preprocessor=dict(...'
                          '), model=dict(...), postprocessor=dict(...'
                          ')), where each sub-dict contains '
                          'parameters of the specified data_labeler '
                          'pipeline components.')):
            data_labeler.set_params({'bad key': None})

        # validate no errors occur when correct params are sent
        data_labeler._preprocessor.get_parameters.return_value = dict()
        data_labeler._model.get_parameters.return_value = dict()
        data_labeler._postprocessor.get_parameters.return_value = dict()

        data_labeler.set_params({
            'preprocessor': {
                'test': 1
            },
            'model': {
                'test': 1
            },
            'postprocessor': {
                'test2': 3
            }
        })

        # validate warning on overlaps.
        # here we presume parameters are set as dict(test=1), dict(test=2)
        data_labeler._preprocessor.get_parameters.return_value = dict(test=1)
        data_labeler._model.get_parameters.return_value = dict(test=2)
        with self.assertWarnsRegex(
                RuntimeWarning, 'Model and preprocessor value for `test` do '
                'not match. 2 != 1'):
            data_labeler.set_params({
                'preprocessor': {
                    'test': 1
                },
                'model': {
                    'test': 2
                }
            })

        # check if param sent for missing pipeline component
        data_labeler._preprocessor = None
        with self.assertRaisesRegex(
                ValueError, 'Parameters for the preprocessor, model, or'
                ' postprocessor were specified when one or '
                'more of these were not set in the '
                'DataLabeler.'):
            data_labeler.set_params({'preprocessor': {'test': 1}})

        data_labeler._model = None
        with self.assertRaisesRegex(
                ValueError, 'Parameters for the preprocessor, model, or'
                ' postprocessor were specified when one or '
                'more of these were not set in the '
                'DataLabeler.'):
            data_labeler.set_params({'model': {'test': 1}})

        data_labeler._postprocessor = None
        with self.assertRaisesRegex(
                ValueError, 'Parameters for the preprocessor, model, or'
                ' postprocessor were specified when one or '
                'more of these were not set in the '
                'DataLabeler.'):
            data_labeler.set_params({'postprocessor': {'test': 1}})