def test_set_params(self, mock_open, mock_load_model, mock_base_processor): self._setup_mock_load_model(mock_load_model) self._setup_mock_load_processor(mock_base_processor) # load default data_labeler = UnstructuredDataLabeler() # check empty sent with self.assertRaisesRegex( ValueError, re.escape("The params dict must have the following " "format:\nparams=dict(preprocessor=dict(..." "), model=dict(...), postprocessor=dict(..." ")), where each sub-dict contains " "parameters of the specified data_labeler " "pipeline components."), ): data_labeler.set_params(None) with self.assertRaisesRegex( ValueError, re.escape("The params dict must have the following " "format:\nparams=dict(preprocessor=dict(..." "), model=dict(...), postprocessor=dict(..." ")), where each sub-dict contains " "parameters of the specified data_labeler " "pipeline components."), ): data_labeler.set_params({}) # test if invalid key sent with self.assertRaisesRegex( ValueError, re.escape("The params dict must have the following " "format:\nparams=dict(preprocessor=dict(..." "), model=dict(...), postprocessor=dict(..." ")), where each sub-dict contains " "parameters of the specified data_labeler " "pipeline components."), ): data_labeler.set_params({"bad key": None}) # validate no errors occur when correct params are sent data_labeler._preprocessor.get_parameters.return_value = dict() data_labeler._model.get_parameters.return_value = dict() data_labeler._postprocessor.get_parameters.return_value = dict() data_labeler.set_params({ "preprocessor": { "test": 1 }, "model": { "test": 1 }, "postprocessor": { "test2": 3 }, }) # validate warning on overlaps. # here we presume parameters are set as dict(test=1), dict(test=2) data_labeler._preprocessor.get_parameters.return_value = dict(test=1) data_labeler._model.get_parameters.return_value = dict(test=2) with self.assertWarnsRegex( RuntimeWarning, "Model and preprocessor value for `test` do " "not match. 2 != 1", ): data_labeler.set_params({ "preprocessor": { "test": 1 }, "model": { "test": 2 } }) # check if param sent for missing pipeline component data_labeler._preprocessor = None with self.assertRaisesRegex( ValueError, "Parameters for the preprocessor, model, or" " postprocessor were specified when one or " "more of these were not set in the " "DataLabeler.", ): data_labeler.set_params({"preprocessor": {"test": 1}}) data_labeler._model = None with self.assertRaisesRegex( ValueError, "Parameters for the preprocessor, model, or" " postprocessor were specified when one or " "more of these were not set in the " "DataLabeler.", ): data_labeler.set_params({"model": {"test": 1}}) data_labeler._postprocessor = None with self.assertRaisesRegex( ValueError, "Parameters for the preprocessor, model, or" " postprocessor were specified when one or " "more of these were not set in the " "DataLabeler.", ): data_labeler.set_params({"postprocessor": {"test": 1}})
def test_set_params(self, mock_open, mock_load_model, mock_base_processor): self._setup_mock_load_model(mock_load_model) self._setup_mock_load_processor(mock_base_processor) # load default data_labeler = UnstructuredDataLabeler() # check empty sent with self.assertRaisesRegex( ValueError, re.escape('The params dict must have the following ' 'format:\nparams=dict(preprocessor=dict(...' '), model=dict(...), postprocessor=dict(...' ')), where each sub-dict contains ' 'parameters of the specified data_labeler ' 'pipeline components.')): data_labeler.set_params(None) with self.assertRaisesRegex( ValueError, re.escape('The params dict must have the following ' 'format:\nparams=dict(preprocessor=dict(...' '), model=dict(...), postprocessor=dict(...' ')), where each sub-dict contains ' 'parameters of the specified data_labeler ' 'pipeline components.')): data_labeler.set_params({}) # test if invalid key sent with self.assertRaisesRegex( ValueError, re.escape('The params dict must have the following ' 'format:\nparams=dict(preprocessor=dict(...' '), model=dict(...), postprocessor=dict(...' ')), where each sub-dict contains ' 'parameters of the specified data_labeler ' 'pipeline components.')): data_labeler.set_params({'bad key': None}) # validate no errors occur when correct params are sent data_labeler._preprocessor.get_parameters.return_value = dict() data_labeler._model.get_parameters.return_value = dict() data_labeler._postprocessor.get_parameters.return_value = dict() data_labeler.set_params({ 'preprocessor': { 'test': 1 }, 'model': { 'test': 1 }, 'postprocessor': { 'test2': 3 } }) # validate warning on overlaps. # here we presume parameters are set as dict(test=1), dict(test=2) data_labeler._preprocessor.get_parameters.return_value = dict(test=1) data_labeler._model.get_parameters.return_value = dict(test=2) with self.assertWarnsRegex( RuntimeWarning, 'Model and preprocessor value for `test` do ' 'not match. 2 != 1'): data_labeler.set_params({ 'preprocessor': { 'test': 1 }, 'model': { 'test': 2 } }) # check if param sent for missing pipeline component data_labeler._preprocessor = None with self.assertRaisesRegex( ValueError, 'Parameters for the preprocessor, model, or' ' postprocessor were specified when one or ' 'more of these were not set in the ' 'DataLabeler.'): data_labeler.set_params({'preprocessor': {'test': 1}}) data_labeler._model = None with self.assertRaisesRegex( ValueError, 'Parameters for the preprocessor, model, or' ' postprocessor were specified when one or ' 'more of these were not set in the ' 'DataLabeler.'): data_labeler.set_params({'model': {'test': 1}}) data_labeler._postprocessor = None with self.assertRaisesRegex( ValueError, 'Parameters for the preprocessor, model, or' ' postprocessor were specified when one or ' 'more of these were not set in the ' 'DataLabeler.'): data_labeler.set_params({'postprocessor': {'test': 1}})